From 0820041e1daa158f39ab2e414d03c7eb2d1260d7 Mon Sep 17 00:00:00 2001 From: Michal Gorny Date: Fri, 4 Oct 2019 20:30:02 +0000 Subject: [PATCH 001/254] [clang-tools-extra] [cmake] Link against libclang-cpp whenever possible Use clang_target_link_libraries() in order to support linking against libclang-cpp instead of static libraries. Differential Revision: https://reviews.llvm.org/D68448 llvm-svn: 373786 --- .../clang-apply-replacements/tool/CMakeLists.txt | 7 +++++-- .../clang-change-namespace/tool/CMakeLists.txt | 7 +++++-- clang-tools-extra/clang-doc/tool/CMakeLists.txt | 7 +++++-- .../find-all-symbols/tool/CMakeLists.txt | 5 ++++- .../clang-include-fixer/tool/CMakeLists.txt | 7 +++++-- clang-tools-extra/clang-move/tool/CMakeLists.txt | 7 +++++-- clang-tools-extra/clang-query/tool/CMakeLists.txt | 7 +++++-- .../clang-reorder-fields/tool/CMakeLists.txt | 7 +++++-- clang-tools-extra/clang-tidy/CMakeLists.txt | 2 +- clang-tools-extra/clang-tidy/tool/CMakeLists.txt | 7 +++++-- clang-tools-extra/clangd/fuzzer/CMakeLists.txt | 7 +++++-- .../clangd/index/dex/dexp/CMakeLists.txt | 5 ++++- clang-tools-extra/clangd/indexer/CMakeLists.txt | 7 +++++-- clang-tools-extra/clangd/tool/CMakeLists.txt | 9 ++++++--- clang-tools-extra/clangd/unittests/CMakeLists.txt | 9 ++++++--- .../clangd/xpc/test-client/CMakeLists.txt | 7 +++++-- clang-tools-extra/modularize/CMakeLists.txt | 2 +- clang-tools-extra/pp-trace/CMakeLists.txt | 2 +- clang-tools-extra/tool-template/CMakeLists.txt | 2 +- .../unittests/clang-apply-replacements/CMakeLists.txt | 7 +++++-- .../unittests/clang-change-namespace/CMakeLists.txt | 7 +++++-- clang-tools-extra/unittests/clang-doc/CMakeLists.txt | 7 +++++-- .../unittests/clang-include-fixer/CMakeLists.txt | 7 +++++-- .../find-all-symbols/CMakeLists.txt | 5 ++++- clang-tools-extra/unittests/clang-move/CMakeLists.txt | 7 +++++-- .../unittests/clang-query/CMakeLists.txt | 7 +++++-- clang-tools-extra/unittests/clang-tidy/CMakeLists.txt | 11 +++++++---- 27 files changed, 120 insertions(+), 51 deletions(-) diff --git a/clang-tools-extra/clang-apply-replacements/tool/CMakeLists.txt b/clang-tools-extra/clang-apply-replacements/tool/CMakeLists.txt index d15a8b1aff274..1ed734c114339 100644 --- a/clang-tools-extra/clang-apply-replacements/tool/CMakeLists.txt +++ b/clang-tools-extra/clang-apply-replacements/tool/CMakeLists.txt @@ -5,12 +5,15 @@ set(LLVM_LINK_COMPONENTS add_clang_tool(clang-apply-replacements ClangApplyReplacementsMain.cpp ) -target_link_libraries(clang-apply-replacements +clang_target_link_libraries(clang-apply-replacements PRIVATE - clangApplyReplacements clangBasic clangFormat clangRewrite clangToolingCore clangToolingRefactoring ) +target_link_libraries(clang-apply-replacements + PRIVATE + clangApplyReplacements + ) diff --git a/clang-tools-extra/clang-change-namespace/tool/CMakeLists.txt b/clang-tools-extra/clang-change-namespace/tool/CMakeLists.txt index 702bad368fa90..ae48a5e0f798e 100644 --- a/clang-tools-extra/clang-change-namespace/tool/CMakeLists.txt +++ b/clang-tools-extra/clang-change-namespace/tool/CMakeLists.txt @@ -7,12 +7,11 @@ set(LLVM_LINK_COMPONENTS add_clang_tool(clang-change-namespace ClangChangeNamespace.cpp ) -target_link_libraries(clang-change-namespace +clang_target_link_libraries(clang-change-namespace PRIVATE clangAST clangASTMatchers clangBasic - clangChangeNamespace clangFormat clangFrontend clangRewrite @@ -20,3 +19,7 @@ target_link_libraries(clang-change-namespace clangTooling clangToolingCore ) +target_link_libraries(clang-change-namespace + PRIVATE + clangChangeNamespace + ) diff --git a/clang-tools-extra/clang-doc/tool/CMakeLists.txt b/clang-tools-extra/clang-doc/tool/CMakeLists.txt index de8c9bcbffa2b..7e71478869160 100644 --- a/clang-tools-extra/clang-doc/tool/CMakeLists.txt +++ b/clang-tools-extra/clang-doc/tool/CMakeLists.txt @@ -4,16 +4,19 @@ add_clang_tool(clang-doc ClangDocMain.cpp ) -target_link_libraries(clang-doc +clang_target_link_libraries(clang-doc PRIVATE clangAST clangASTMatchers clangBasic clangFrontend - clangDoc clangTooling clangToolingCore ) +target_link_libraries(clang-doc + PRIVATE + clangDoc + ) install(FILES ../assets/clang-doc-default-stylesheet.css DESTINATION share/clang diff --git a/clang-tools-extra/clang-include-fixer/find-all-symbols/tool/CMakeLists.txt b/clang-tools-extra/clang-include-fixer/find-all-symbols/tool/CMakeLists.txt index 7f101ebd953bb..8f5509d22e24a 100644 --- a/clang-tools-extra/clang-include-fixer/find-all-symbols/tool/CMakeLists.txt +++ b/clang-tools-extra/clang-include-fixer/find-all-symbols/tool/CMakeLists.txt @@ -4,7 +4,7 @@ add_clang_tool(find-all-symbols FindAllSymbolsMain.cpp ) -target_link_libraries(find-all-symbols +clang_target_link_libraries(find-all-symbols PRIVATE clangAST clangASTMatchers @@ -13,6 +13,9 @@ target_link_libraries(find-all-symbols clangLex clangSerialization clangTooling + ) +target_link_libraries(find-all-symbols + PRIVATE findAllSymbols ) diff --git a/clang-tools-extra/clang-include-fixer/tool/CMakeLists.txt b/clang-tools-extra/clang-include-fixer/tool/CMakeLists.txt index 5b600a4639e72..3936ac1e8a5a5 100644 --- a/clang-tools-extra/clang-include-fixer/tool/CMakeLists.txt +++ b/clang-tools-extra/clang-include-fixer/tool/CMakeLists.txt @@ -4,16 +4,19 @@ add_clang_tool(clang-include-fixer ClangIncludeFixer.cpp ) -target_link_libraries(clang-include-fixer +clang_target_link_libraries(clang-include-fixer PRIVATE clangBasic clangFormat clangFrontend - clangIncludeFixer clangRewrite clangSerialization clangTooling clangToolingCore + ) +target_link_libraries(clang-include-fixer + PRIVATE + clangIncludeFixer findAllSymbols ) diff --git a/clang-tools-extra/clang-move/tool/CMakeLists.txt b/clang-tools-extra/clang-move/tool/CMakeLists.txt index b6051e4fadfed..a0c9c20d70be1 100644 --- a/clang-tools-extra/clang-move/tool/CMakeLists.txt +++ b/clang-tools-extra/clang-move/tool/CMakeLists.txt @@ -4,16 +4,19 @@ add_clang_tool(clang-move ClangMove.cpp ) -target_link_libraries(clang-move +clang_target_link_libraries(clang-move PRIVATE clangAST clangASTMatchers clangBasic clangFormat clangFrontend - clangMove clangRewrite clangSerialization clangTooling clangToolingCore ) +target_link_libraries(clang-move + PRIVATE + clangMove + ) diff --git a/clang-tools-extra/clang-query/tool/CMakeLists.txt b/clang-tools-extra/clang-query/tool/CMakeLists.txt index 7071c94cffc6e..be83a7c085646 100644 --- a/clang-tools-extra/clang-query/tool/CMakeLists.txt +++ b/clang-tools-extra/clang-query/tool/CMakeLists.txt @@ -3,14 +3,17 @@ include_directories(${CMAKE_CURRENT_SOURCE_DIR}/..) add_clang_tool(clang-query ClangQuery.cpp ) -target_link_libraries(clang-query +clang_target_link_libraries(clang-query PRIVATE clangAST clangASTMatchers clangBasic clangDynamicASTMatchers clangFrontend - clangQuery clangSerialization clangTooling ) +target_link_libraries(clang-query + PRIVATE + clangQuery + ) diff --git a/clang-tools-extra/clang-reorder-fields/tool/CMakeLists.txt b/clang-tools-extra/clang-reorder-fields/tool/CMakeLists.txt index 718ee960a61b3..b414f4f4da99f 100644 --- a/clang-tools-extra/clang-reorder-fields/tool/CMakeLists.txt +++ b/clang-tools-extra/clang-reorder-fields/tool/CMakeLists.txt @@ -2,13 +2,16 @@ add_clang_tool(clang-reorder-fields ClangReorderFields.cpp ) -target_link_libraries(clang-reorder-fields +clang_target_link_libraries(clang-reorder-fields PRIVATE clangBasic clangFrontend - clangReorderFields clangRewrite clangSerialization clangTooling clangToolingCore ) +target_link_libraries(clang-reorder-fields + PRIVATE + clangReorderFields + ) diff --git a/clang-tools-extra/clang-tidy/CMakeLists.txt b/clang-tools-extra/clang-tidy/CMakeLists.txt index 6dadb27177119..8e747b32e76c2 100644 --- a/clang-tools-extra/clang-tidy/CMakeLists.txt +++ b/clang-tools-extra/clang-tidy/CMakeLists.txt @@ -31,7 +31,7 @@ add_clang_library(clangTidy ) if(CLANG_ENABLE_STATIC_ANALYZER) - target_link_libraries(clangTidy PRIVATE + clang_target_link_libraries(clangTidy PRIVATE clangStaticAnalyzerCore clangStaticAnalyzerFrontend ) diff --git a/clang-tools-extra/clang-tidy/tool/CMakeLists.txt b/clang-tools-extra/clang-tidy/tool/CMakeLists.txt index fc2b4ebd3b410..073749a7d8363 100644 --- a/clang-tools-extra/clang-tidy/tool/CMakeLists.txt +++ b/clang-tools-extra/clang-tidy/tool/CMakeLists.txt @@ -11,14 +11,17 @@ add_clang_tool(clang-tidy add_dependencies(clang-tidy clang-resource-headers ) -target_link_libraries(clang-tidy +clang_target_link_libraries(clang-tidy PRIVATE clangAST clangASTMatchers clangBasic - clangTidy clangTooling clangToolingCore + ) +target_link_libraries(clang-tidy + PRIVATE + clangTidy ${ALL_CLANG_TIDY_CHECKS} ) diff --git a/clang-tools-extra/clangd/fuzzer/CMakeLists.txt b/clang-tools-extra/clangd/fuzzer/CMakeLists.txt index 28191a3f5605f..90379822ac661 100644 --- a/clang-tools-extra/clangd/fuzzer/CMakeLists.txt +++ b/clang-tools-extra/clangd/fuzzer/CMakeLists.txt @@ -12,13 +12,16 @@ add_llvm_fuzzer(clangd-fuzzer DUMMY_MAIN DummyClangdMain.cpp ) -target_link_libraries(clangd-fuzzer +clang_target_link_libraries(clangd-fuzzer PRIVATE clangBasic - clangDaemon clangFormat clangFrontend clangSema clangTooling clangToolingCore ) +target_link_libraries(clangd-fuzzer + PRIVATE + clangDaemon + ) diff --git a/clang-tools-extra/clangd/index/dex/dexp/CMakeLists.txt b/clang-tools-extra/clangd/index/dex/dexp/CMakeLists.txt index ece339d702c62..a4edbb372a761 100644 --- a/clang-tools-extra/clangd/index/dex/dexp/CMakeLists.txt +++ b/clang-tools-extra/clangd/index/dex/dexp/CMakeLists.txt @@ -9,8 +9,11 @@ add_clang_executable(dexp Dexp.cpp ) -target_link_libraries(dexp +clang_target_link_libraries(dexp PRIVATE clangBasic + ) +target_link_libraries(dexp + PRIVATE clangDaemon ) diff --git a/clang-tools-extra/clangd/indexer/CMakeLists.txt b/clang-tools-extra/clangd/indexer/CMakeLists.txt index 92aae0643ce87..edbced1410bb8 100644 --- a/clang-tools-extra/clangd/indexer/CMakeLists.txt +++ b/clang-tools-extra/clangd/indexer/CMakeLists.txt @@ -8,13 +8,16 @@ add_clang_executable(clangd-indexer IndexerMain.cpp ) -target_link_libraries(clangd-indexer +clang_target_link_libraries(clangd-indexer PRIVATE clangAST clangBasic - clangDaemon clangFrontend clangIndex clangLex clangTooling ) +target_link_libraries(clangd-indexer + PRIVATE + clangDaemon +) diff --git a/clang-tools-extra/clangd/tool/CMakeLists.txt b/clang-tools-extra/clangd/tool/CMakeLists.txt index 085ede378268a..9ebbe5f04cd56 100644 --- a/clang-tools-extra/clangd/tool/CMakeLists.txt +++ b/clang-tools-extra/clangd/tool/CMakeLists.txt @@ -15,12 +15,10 @@ if(CLANGD_BUILD_XPC) list(APPEND CLANGD_XPC_LIBS "clangdXpcJsonConversions" "clangdXpcTransport") endif() -target_link_libraries(clangd +clang_target_link_libraries(clangd PRIVATE clangAST clangBasic - clangTidy - clangDaemon clangFormat clangFrontend clangSema @@ -28,5 +26,10 @@ target_link_libraries(clangd clangToolingCore clangToolingRefactoring clangToolingSyntax + ) +target_link_libraries(clangd + PRIVATE + clangTidy + clangDaemon ${CLANGD_XPC_LIBS} ) diff --git a/clang-tools-extra/clangd/unittests/CMakeLists.txt b/clang-tools-extra/clangd/unittests/CMakeLists.txt index d25745f94c7b9..7e298b6ad0537 100644 --- a/clang-tools-extra/clangd/unittests/CMakeLists.txt +++ b/clang-tools-extra/clangd/unittests/CMakeLists.txt @@ -78,23 +78,26 @@ add_unittest(ClangdUnitTests ClangdTests $ ) -target_link_libraries(ClangdTests +clang_target_link_libraries(ClangdTests PRIVATE clangAST clangBasic - clangDaemon clangFormat clangFrontend clangIndex clangLex clangSema clangSerialization - clangTidy clangTooling clangToolingCore clangToolingInclusions clangToolingRefactoring clangToolingSyntax + ) +target_link_libraries(ClangdTests + PRIVATE + clangDaemon + clangTidy LLVMSupport LLVMTestingSupport ) diff --git a/clang-tools-extra/clangd/xpc/test-client/CMakeLists.txt b/clang-tools-extra/clangd/xpc/test-client/CMakeLists.txt index 283599ecce77c..1bf01c63d7224 100644 --- a/clang-tools-extra/clangd/xpc/test-client/CMakeLists.txt +++ b/clang-tools-extra/clangd/xpc/test-client/CMakeLists.txt @@ -13,14 +13,17 @@ set(LLVM_LINK_COMPONENTS support ) -target_link_libraries(clangd-xpc-test-client +clang_target_link_libraries(clangd-xpc-test-client PRIVATE clangBasic - clangDaemon clangFormat clangFrontend clangSema clangTooling clangToolingCore +) +target_link_libraries(clangd-xpc-test-client + PRIVATE + clangDaemon clangdXpcJsonConversions ) diff --git a/clang-tools-extra/modularize/CMakeLists.txt b/clang-tools-extra/modularize/CMakeLists.txt index fa2c0e5346eb4..4caae81c49b62 100644 --- a/clang-tools-extra/modularize/CMakeLists.txt +++ b/clang-tools-extra/modularize/CMakeLists.txt @@ -11,7 +11,7 @@ add_clang_tool(modularize PreprocessorTracker.cpp ) -target_link_libraries(modularize +clang_target_link_libraries(modularize PRIVATE clangAST clangBasic diff --git a/clang-tools-extra/pp-trace/CMakeLists.txt b/clang-tools-extra/pp-trace/CMakeLists.txt index 11b45ac638a17..be1d9715cf26d 100644 --- a/clang-tools-extra/pp-trace/CMakeLists.txt +++ b/clang-tools-extra/pp-trace/CMakeLists.txt @@ -7,7 +7,7 @@ add_clang_tool(pp-trace PPCallbacksTracker.cpp ) -target_link_libraries(pp-trace +clang_target_link_libraries(pp-trace PRIVATE clangAST clangBasic diff --git a/clang-tools-extra/tool-template/CMakeLists.txt b/clang-tools-extra/tool-template/CMakeLists.txt index 9a304d4344ee8..959bd3d1ac932 100644 --- a/clang-tools-extra/tool-template/CMakeLists.txt +++ b/clang-tools-extra/tool-template/CMakeLists.txt @@ -6,7 +6,7 @@ add_clang_executable(tool-template ToolTemplate.cpp ) -target_link_libraries(tool-template +clang_target_link_libraries(tool-template PRIVATE clangAST clangASTMatchers diff --git a/clang-tools-extra/unittests/clang-apply-replacements/CMakeLists.txt b/clang-tools-extra/unittests/clang-apply-replacements/CMakeLists.txt index d3200d76b0837..b345527fe956c 100644 --- a/clang-tools-extra/unittests/clang-apply-replacements/CMakeLists.txt +++ b/clang-tools-extra/unittests/clang-apply-replacements/CMakeLists.txt @@ -11,10 +11,13 @@ add_extra_unittest(ClangApplyReplacementsTests ApplyReplacementsTest.cpp ) -target_link_libraries(ClangApplyReplacementsTests +clang_target_link_libraries(ClangApplyReplacementsTests PRIVATE - clangApplyReplacements clangBasic clangToolingCore clangToolingRefactoring ) +target_link_libraries(ClangApplyReplacementsTests + PRIVATE + clangApplyReplacements + ) diff --git a/clang-tools-extra/unittests/clang-change-namespace/CMakeLists.txt b/clang-tools-extra/unittests/clang-change-namespace/CMakeLists.txt index 9c949723f5463..d66f85da44036 100644 --- a/clang-tools-extra/unittests/clang-change-namespace/CMakeLists.txt +++ b/clang-tools-extra/unittests/clang-change-namespace/CMakeLists.txt @@ -15,12 +15,11 @@ add_extra_unittest(ClangChangeNamespaceTests ChangeNamespaceTests.cpp ) -target_link_libraries(ClangChangeNamespaceTests +clang_target_link_libraries(ClangChangeNamespaceTests PRIVATE clangAST clangASTMatchers clangBasic - clangChangeNamespace clangFormat clangFrontend clangRewrite @@ -28,3 +27,7 @@ target_link_libraries(ClangChangeNamespaceTests clangTooling clangToolingCore ) +target_link_libraries(ClangChangeNamespaceTests + PRIVATE + clangChangeNamespace + ) diff --git a/clang-tools-extra/unittests/clang-doc/CMakeLists.txt b/clang-tools-extra/unittests/clang-doc/CMakeLists.txt index 292a1d7a5f191..7934cd17cf927 100644 --- a/clang-tools-extra/unittests/clang-doc/CMakeLists.txt +++ b/clang-tools-extra/unittests/clang-doc/CMakeLists.txt @@ -20,12 +20,11 @@ add_extra_unittest(ClangDocTests YAMLGeneratorTest.cpp ) -target_link_libraries(ClangDocTests +clang_target_link_libraries(ClangDocTests PRIVATE clangAST clangASTMatchers clangBasic - clangDoc clangFormat clangFrontend clangRewrite @@ -33,3 +32,7 @@ target_link_libraries(ClangDocTests clangTooling clangToolingCore ) +target_link_libraries(ClangDocTests + PRIVATE + clangDoc + ) diff --git a/clang-tools-extra/unittests/clang-include-fixer/CMakeLists.txt b/clang-tools-extra/unittests/clang-include-fixer/CMakeLists.txt index 997aa1459a31f..0c0954c2a7cd2 100644 --- a/clang-tools-extra/unittests/clang-include-fixer/CMakeLists.txt +++ b/clang-tools-extra/unittests/clang-include-fixer/CMakeLists.txt @@ -16,16 +16,19 @@ add_extra_unittest(ClangIncludeFixerTests FuzzySymbolIndexTests.cpp ) -target_link_libraries(ClangIncludeFixerTests +clang_target_link_libraries(ClangIncludeFixerTests PRIVATE clangBasic clangFormat clangFrontend - clangIncludeFixer clangRewrite clangSerialization clangTooling clangToolingCore + ) +target_link_libraries(ClangIncludeFixerTests + PRIVATE + clangIncludeFixer findAllSymbols ) diff --git a/clang-tools-extra/unittests/clang-include-fixer/find-all-symbols/CMakeLists.txt b/clang-tools-extra/unittests/clang-include-fixer/find-all-symbols/CMakeLists.txt index 427aa8ed86fb2..828d4347a0aee 100644 --- a/clang-tools-extra/unittests/clang-include-fixer/find-all-symbols/CMakeLists.txt +++ b/clang-tools-extra/unittests/clang-include-fixer/find-all-symbols/CMakeLists.txt @@ -12,7 +12,7 @@ add_extra_unittest(FindAllSymbolsTests FindAllSymbolsTests.cpp ) -target_link_libraries(FindAllSymbolsTests +clang_target_link_libraries(FindAllSymbolsTests PRIVATE clangAST clangASTMatchers @@ -21,5 +21,8 @@ target_link_libraries(FindAllSymbolsTests clangLex clangSerialization clangTooling + ) +target_link_libraries(FindAllSymbolsTests + PRIVATE findAllSymbols ) diff --git a/clang-tools-extra/unittests/clang-move/CMakeLists.txt b/clang-tools-extra/unittests/clang-move/CMakeLists.txt index 1d5347fe30ddc..468c65c36a434 100644 --- a/clang-tools-extra/unittests/clang-move/CMakeLists.txt +++ b/clang-tools-extra/unittests/clang-move/CMakeLists.txt @@ -15,16 +15,19 @@ add_extra_unittest(ClangMoveTests ClangMoveTests.cpp ) -target_link_libraries(ClangMoveTests +clang_target_link_libraries(ClangMoveTests PRIVATE clangAST clangASTMatchers clangBasic clangFormat clangFrontend - clangMove clangRewrite clangSerialization clangTooling clangToolingCore ) +target_link_libraries(ClangMoveTests + PRIVATE + clangMove + ) diff --git a/clang-tools-extra/unittests/clang-query/CMakeLists.txt b/clang-tools-extra/unittests/clang-query/CMakeLists.txt index 2177764c41a1d..975664259c136 100644 --- a/clang-tools-extra/unittests/clang-query/CMakeLists.txt +++ b/clang-tools-extra/unittests/clang-query/CMakeLists.txt @@ -11,14 +11,17 @@ add_extra_unittest(ClangQueryTests QueryParserTest.cpp ) -target_link_libraries(ClangQueryTests +clang_target_link_libraries(ClangQueryTests PRIVATE clangAST clangASTMatchers clangBasic clangDynamicASTMatchers clangFrontend - clangQuery clangSerialization clangTooling ) +target_link_libraries(ClangQueryTests + PRIVATE + clangQuery + ) diff --git a/clang-tools-extra/unittests/clang-tidy/CMakeLists.txt b/clang-tools-extra/unittests/clang-tidy/CMakeLists.txt index 93b49f546a0a8..287b431c2cfb0 100644 --- a/clang-tools-extra/unittests/clang-tidy/CMakeLists.txt +++ b/clang-tools-extra/unittests/clang-tidy/CMakeLists.txt @@ -21,7 +21,7 @@ add_extra_unittest(ClangTidyTests TransformerClangTidyCheckTest.cpp ) -target_link_libraries(ClangTidyTests +clang_target_link_libraries(ClangTidyTests PRIVATE clangAST clangASTMatchers @@ -29,6 +29,12 @@ target_link_libraries(ClangTidyTests clangFrontend clangLex clangSerialization + clangTooling + clangToolingCore + clangToolingRefactoring + ) +target_link_libraries(ClangTidyTests + PRIVATE clangTidy clangTidyAndroidModule clangTidyGoogleModule @@ -36,7 +42,4 @@ target_link_libraries(ClangTidyTests clangTidyObjCModule clangTidyReadabilityModule clangTidyUtils - clangTooling - clangToolingCore - clangToolingRefactoring ) From 4c7b8421856ee677b6a7ad4af844dcde647de960 Mon Sep 17 00:00:00 2001 From: Jan Kratochvil Date: Fri, 4 Oct 2019 20:49:44 +0000 Subject: [PATCH 002/254] [lldb] [testsuite] Mark TestSBCommandReturnObject as failing on Windows Filed: testsuite: TestSBCommandReturnObject: object has no attribute 'dylibPath' https://bugs.llvm.org/show_bug.cgi?id=43570 http://lab.llvm.org:8011/builders/lldb-x64-windows-ninja/builds/9530/steps/test/logs/stdio AttributeError: 'TestSBCommandReturnObject' object has no attribute 'dylibPath' Fix crash on SBCommandReturnObject & assignment https://reviews.llvm.org/D67589 env = {self.dylibPath: self.getLLDBLibraryEnvVal()} I do not know how to link with liblldb on Windows so marking it as failing on Windows. llvm-svn: 373787 --- .../api/command-return-object/TestSBCommandReturnObject.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lldb/packages/Python/lldbsuite/test/api/command-return-object/TestSBCommandReturnObject.py b/lldb/packages/Python/lldbsuite/test/api/command-return-object/TestSBCommandReturnObject.py index cf7dbe58daa63..c7aa2eced83d9 100644 --- a/lldb/packages/Python/lldbsuite/test/api/command-return-object/TestSBCommandReturnObject.py +++ b/lldb/packages/Python/lldbsuite/test/api/command-return-object/TestSBCommandReturnObject.py @@ -14,6 +14,9 @@ class TestSBCommandReturnObject(TestBase): NO_DEBUG_INFO_TESTCASE = True @skipIfNoSBHeaders + @expectedFailureAll( + oslist=["windows"], + bugnumber="llvm.org/pr43570") def test_sb_command_return_object(self): env = {self.dylibPath: self.getLLDBLibraryEnvVal()} From 6e312388b6f861067285bac45d90bd92b508608c Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Fri, 4 Oct 2019 20:54:14 +0000 Subject: [PATCH 003/254] [InstCombine] add tests for fneg disguised as fmul; NFC llvm-svn: 373788 --- llvm/test/Transforms/InstCombine/fmul.ll | 74 ++++++++++++++++++++++++ 1 file changed, 74 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/fmul.ll b/llvm/test/Transforms/InstCombine/fmul.ll index d448679804fe9..5ab6d93c834b8 100644 --- a/llvm/test/Transforms/InstCombine/fmul.ll +++ b/llvm/test/Transforms/InstCombine/fmul.ll @@ -991,3 +991,77 @@ define double @fmul_negated_constant_expression(double %x) { %r = fmul double %x, fsub (double -0.000000e+00, double bitcast (i64 ptrtoint (i8** getelementptr inbounds ({ [2 x i8*] }, { [2 x i8*] }* @g, i64 0, inrange i32 0, i64 2) to i64) to double)) ret double %r } + +define float @negate_if_true(float %x, i1 %cond) { +; CHECK-LABEL: @negate_if_true( +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[COND:%.*]], float -1.000000e+00, float 1.000000e+00 +; CHECK-NEXT: [[R:%.*]] = fmul float [[SEL]], [[X:%.*]] +; CHECK-NEXT: ret float [[R]] +; + %sel = select i1 %cond, float -1.0, float 1.0 + %r = fmul float %sel, %x + ret float %r +} + +define float @negate_if_false(float %x, i1 %cond) { +; CHECK-LABEL: @negate_if_false( +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[COND:%.*]], float 1.000000e+00, float -1.000000e+00 +; CHECK-NEXT: [[R:%.*]] = fmul float [[SEL]], [[X:%.*]] +; CHECK-NEXT: ret float [[R]] +; + %sel = select i1 %cond, float 1.0, float -1.0 + %r = fmul float %sel, %x + ret float %r +} + +define <2 x double> @negate_if_true_commute(<2 x double> %px, i1 %cond) { +; CHECK-LABEL: @negate_if_true_commute( +; CHECK-NEXT: [[X:%.*]] = fdiv <2 x double> , [[PX:%.*]] +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[COND:%.*]], <2 x double> , <2 x double> +; CHECK-NEXT: [[R:%.*]] = fmul <2 x double> [[X]], [[SEL]] +; CHECK-NEXT: ret <2 x double> [[R]] +; + %x = fdiv <2 x double> , %px ; thwart complexity-based canonicalization + %sel = select i1 %cond, <2 x double> , <2 x double> + %r = fmul <2 x double> %x, %sel + ret <2 x double> %r +} + +define <2 x double> @negate_if_false_commute(<2 x double> %px, <2 x i1> %cond) { +; CHECK-LABEL: @negate_if_false_commute( +; CHECK-NEXT: [[X:%.*]] = fdiv <2 x double> , [[PX:%.*]] +; CHECK-NEXT: [[SEL:%.*]] = select <2 x i1> [[COND:%.*]], <2 x double> , <2 x double> +; CHECK-NEXT: [[R:%.*]] = fmul <2 x double> [[X]], [[SEL]] +; CHECK-NEXT: ret <2 x double> [[R]] +; + %x = fdiv <2 x double> , %px ; thwart complexity-based canonicalization + %sel = select <2 x i1> %cond, <2 x double> , <2 x double> + %r = fmul <2 x double> %x, %sel + ret <2 x double> %r +} + +define float @negate_if_true_extra_use(float %x, i1 %cond) { +; CHECK-LABEL: @negate_if_true_extra_use( +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[COND:%.*]], float -1.000000e+00, float 1.000000e+00 +; CHECK-NEXT: call void @use_f32(float [[SEL]]) +; CHECK-NEXT: [[R:%.*]] = fmul float [[SEL]], [[X:%.*]] +; CHECK-NEXT: ret float [[R]] +; + %sel = select i1 %cond, float -1.0, float 1.0 + call void @use_f32(float %sel) + %r = fmul float %sel, %x + ret float %r +} + +define <2 x double> @negate_if_true_wrong_constant(<2 x double> %px, i1 %cond) { +; CHECK-LABEL: @negate_if_true_wrong_constant( +; CHECK-NEXT: [[X:%.*]] = fdiv <2 x double> , [[PX:%.*]] +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[COND:%.*]], <2 x double> , <2 x double> +; CHECK-NEXT: [[R:%.*]] = fmul <2 x double> [[X]], [[SEL]] +; CHECK-NEXT: ret <2 x double> [[R]] +; + %x = fdiv <2 x double> , %px ; thwart complexity-based canonicalization + %sel = select i1 %cond, <2 x double> , <2 x double> + %r = fmul <2 x double> %x, %sel + ret <2 x double> %r +} From 984d08c680a79c72da1fbd55f7c4f8f672ef7f95 Mon Sep 17 00:00:00 2001 From: Jason Molenda Date: Fri, 4 Oct 2019 21:01:52 +0000 Subject: [PATCH 004/254] Expand on the qfProcessInfo documentation, add examples from lldb-gdb-remote.txt and text explaining the no-criteria mode. llvm-svn: 373789 --- lldb/docs/lldb-platform-packets.txt | 37 +++++++++++++++++++++++++---- 1 file changed, 33 insertions(+), 4 deletions(-) diff --git a/lldb/docs/lldb-platform-packets.txt b/lldb/docs/lldb-platform-packets.txt index 3258e4c0dd882..ed5d71701715f 100644 --- a/lldb/docs/lldb-platform-packets.txt +++ b/lldb/docs/lldb-platform-packets.txt @@ -181,10 +181,31 @@ incompatible with the flags that gdb specifies. // send: pid:3500;name:612e6f7574; // // The request packet has a criteria to search for, followed by -// a specific name. Other name_match: values include -// starts_with, ends_with, contains, regex. You can specify a pid -// to search for, a uid, all_users, triple, etc etc. The testsuite -// only ever searches for name_match:equals. +// a specific name. +// +// KEY VALUE DESCRIPTION +// =========== ======== ================================================ +// "name" ascii-hex An ASCII hex string that contains the name of +// the process that will be matched. +// "name_match" enum One of: "equals", "starts_with", "ends_with", +// "contains" or "regex" +// "pid" integer A string value containing the decimal process ID +// "parent_pid" integer A string value containing the decimal parent +// process ID +// "uid" integer A string value containing the decimal user ID +// "gid" integer A string value containing the decimal group ID +// "euid" integer A string value containing the decimal effective user ID +// "egid" integer A string value containing the decimal effective group ID +// "all_users" bool A boolean value that specifies if processes should +// be listed for all users, not just the user that the +// platform is running as +// "triple" ascii-hex An ASCII hex target triple string ("x86_64", +// "x86_64-apple-macosx", "armv7-apple-ios") +// +// If no criteria is given, qfProcessInfo will request a list of every process. +// +// The lldb testsuite currently only uses name_match:equals and the +// no-criteria mode to list every process. // // The response should include any information about the process that // can be retrieved in semicolon-separated name:value fields. @@ -195,6 +216,14 @@ incompatible with the flags that gdb specifies. // the search, qsProcessInfo should be sent. // // If no process match is found, Exx should be returned. +// +// Sample packet/response: +// send packet: $qfProcessInfo#00 +// read packet: $pid:60001;ppid:59948;uid:7746;gid:11;euid:7746;egid:11;name:6c6c6462;triple:7838365f36342d6170706c652d6d61636f7378;#00 +// send packet: $qsProcessInfo#00 +// read packet: $pid:59992;ppid:192;uid:7746;gid:11;euid:7746;egid:11;name:6d64776f726b6572;triple:7838365f36342d6170706c652d6d61636f7378;#00 +// send packet: $qsProcessInfo#00 +// read packet: $E04#00 //---------------------------------------------------------------------- // qsProcessInfo From 784892c9641a65e9acbcb442e26b00022240f4ee Mon Sep 17 00:00:00 2001 From: Jessica Paquette Date: Fri, 4 Oct 2019 21:24:12 +0000 Subject: [PATCH 005/254] [MachineOutliner] Disable outlining from noreturn functions Outlining from noreturn functions doesn't do the correct thing right now. The outliner should respect that the caller is marked noreturn. In the event that we have a noreturn function, and the outlined code is in tail position, the outliner will not see that the outlined function should be tail called. As a result, you end up with a regular call containing a return. Fixing this requires that we check that all candidates live inside noreturn functions. So, for the sake of correctness, don't outline from noreturn functions right now. Add machine-outliner-noreturn.mir to test this. llvm-svn: 373791 --- llvm/lib/CodeGen/MachineOutliner.cpp | 6 ++ .../AArch64/machine-outliner-noreturn.mir | 56 +++++++++++++++++++ 2 files changed, 62 insertions(+) create mode 100644 llvm/test/CodeGen/AArch64/machine-outliner-noreturn.mir diff --git a/llvm/lib/CodeGen/MachineOutliner.cpp b/llvm/lib/CodeGen/MachineOutliner.cpp index 533cce57adc84..60eeefba9d6fe 100644 --- a/llvm/lib/CodeGen/MachineOutliner.cpp +++ b/llvm/lib/CodeGen/MachineOutliner.cpp @@ -1303,6 +1303,12 @@ void MachineOutliner::populateMapper(InstructionMapper &Mapper, Module &M, if (F.empty()) continue; + // Disable outlining from noreturn functions right now. Noreturn requires + // special handling for the case where what we are outlining could be a + // tail call. + if (F.hasFnAttribute(Attribute::NoReturn)) + continue; + // There's something in F. Check if it has a MachineFunction associated with // it. MachineFunction *MF = MMI.getMachineFunction(F); diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-noreturn.mir b/llvm/test/CodeGen/AArch64/machine-outliner-noreturn.mir new file mode 100644 index 0000000000000..29166f9f12d82 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/machine-outliner-noreturn.mir @@ -0,0 +1,56 @@ +# RUN: llc -mtriple=aarch64-unknown-unknown -run-pass=machine-outliner -verify-machineinstrs %s -o - | FileCheck %s + +--- | + define void @foo() #0 { ret void } + define void @bar(i32 %a) #0 { ret void } + define void @baz(i32 %a) #0 { ret void } + attributes #0 = { noredzone noreturn } +... +--- + +# Temporarily disable outlining from noreturn functions. To do this, we need +# to verify thst every function we want to outline from is noreturn. + +# CHECK-NOT: OUTLINED_FUNCTION + +name: foo +alignment: 4 +tracksRegLiveness: true +frameInfo: + maxAlignment: 1 + maxCallFrameSize: 0 +machineFunctionInfo: {} +body: | + bb.0: + $w3 = ORRWri $wzr, 1 + $w4 = ORRWri $wzr, 1 + BRK 1 +... +--- +name: bar +alignment: 4 +tracksRegLiveness: true +frameInfo: + maxAlignment: 1 + maxCallFrameSize: 0 +machineFunctionInfo: {} +body: | + bb.0: + $w3 = ORRWri $wzr, 1 + $w4 = ORRWri $wzr, 1 + BRK 1 +... +--- +name: baz +alignment: 4 +tracksRegLiveness: true +frameInfo: + maxAlignment: 1 + maxCallFrameSize: 0 +machineFunctionInfo: {} +body: | + bb.0: + $w3 = ORRWri $wzr, 1 + $w4 = ORRWri $wzr, 1 + BRK 1 +... From 442ddffe138a79ef07ee86863a429e333c6168cf Mon Sep 17 00:00:00 2001 From: Yuanfang Chen Date: Fri, 4 Oct 2019 21:37:20 +0000 Subject: [PATCH 006/254] [clang] fix a typo from r372531 Reviewers: xbolva00 Subscribers: cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D68482 llvm-svn: 373792 --- clang/lib/Sema/SemaChecking.cpp | 2 +- clang/test/Sema/warn-integer-constants-in-ternary.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 08d124184a98f..e65ad94d2daeb 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -11384,7 +11384,7 @@ static void DiagnoseIntInBoolContext(Sema &S, Expr *E) { (RHS->getValue() == 0 || RHS->getValue() == 1)) // Do not diagnose common idioms. return; - if (LHS->getValue() != 0 && LHS->getValue() != 0) + if (LHS->getValue() != 0 && RHS->getValue() != 0) S.Diag(ExprLoc, diag::warn_integer_constants_in_conditional_always_true); } } diff --git a/clang/test/Sema/warn-integer-constants-in-ternary.c b/clang/test/Sema/warn-integer-constants-in-ternary.c index 287b91ecdb723..95c78d272794e 100644 --- a/clang/test/Sema/warn-integer-constants-in-ternary.c +++ b/clang/test/Sema/warn-integer-constants-in-ternary.c @@ -18,7 +18,7 @@ void test(boolean a) { boolean r; r = a ? (1) : TWO; r = a ? 3 : TWO; // expected-warning {{converting the result of '?:' with integer constants to a boolean always evaluates to 'true'}} - r = a ? -2 : 0; // expected-warning {{converting the result of '?:' with integer constants to a boolean always evaluates to 'true'}} + r = a ? -2 : 0; r = a ? 3 : -2; // expected-warning {{converting the result of '?:' with integer constants to a boolean always evaluates to 'true'}} r = a ? 0 : TWO; r = a ? 3 : ONE; // expected-warning {{converting the result of '?:' with integer constants to a boolean always evaluates to 'true'}} From 4f75a73796fff940fd30d38e3185c9e733ddbf2c Mon Sep 17 00:00:00 2001 From: Jon Chesterfield Date: Fri, 4 Oct 2019 21:39:22 +0000 Subject: [PATCH 007/254] Use named constant to indicate all lanes, to handle 32 and 64 wide architectures Summary: Use named constant to indicate all lanes, to handle 32 and 64 wide architectures Reviewers: ABataev, jdoerfert, grokos, ronlieb Reviewed By: grokos Subscribers: ronlieb, openmp-commits Tags: #openmp Differential Revision: https://reviews.llvm.org/D68369 llvm-svn: 373793 --- openmp/libomptarget/deviceRTLs/nvptx/src/parallel.cu | 4 ++-- .../libomptarget/deviceRTLs/nvptx/src/reduction.cu | 12 ++++++------ .../libomptarget/deviceRTLs/nvptx/src/target_impl.h | 2 ++ 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/parallel.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/parallel.cu index 5db443c3b331d..24a235df52569 100644 --- a/openmp/libomptarget/deviceRTLs/nvptx/src/parallel.cu +++ b/openmp/libomptarget/deviceRTLs/nvptx/src/parallel.cu @@ -320,7 +320,7 @@ EXTERN bool __kmpc_kernel_parallel(void **WorkFn, // can be changed incorrectly because of threads divergence. bool IsActiveParallelRegion = threadsInTeam != 1; IncParallelLevel(IsActiveParallelRegion, - IsActiveParallelRegion ? 0xFFFFFFFF : 1u); + IsActiveParallelRegion ? __kmpc_impl_all_lanes : 1u); } return isActive; @@ -347,7 +347,7 @@ EXTERN void __kmpc_kernel_end_parallel() { // be changed incorrectly because of threads divergence. bool IsActiveParallelRegion = threadsInTeam != 1; DecParallelLevel(IsActiveParallelRegion, - IsActiveParallelRegion ? 0xFFFFFFFF : 1u); + IsActiveParallelRegion ? __kmpc_impl_all_lanes : 1u); } //////////////////////////////////////////////////////////////////////////////// diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu index 347c5568bb876..cee3e5d6dd3be 100644 --- a/openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu +++ b/openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu @@ -24,14 +24,14 @@ EXTERN void __kmpc_nvptx_end_reduce_nowait(int32_t global_tid) {} EXTERN int32_t __kmpc_shuffle_int32(int32_t val, int16_t delta, int16_t size) { - return __kmpc_impl_shfl_down_sync(0xFFFFFFFF, val, delta, size); + return __kmpc_impl_shfl_down_sync(__kmpc_impl_all_lanes, val, delta, size); } EXTERN int64_t __kmpc_shuffle_int64(int64_t val, int16_t delta, int16_t size) { uint32_t lo, hi; __kmpc_impl_unpack(val, lo, hi); - hi = __kmpc_impl_shfl_down_sync(0xFFFFFFFF, hi, delta, size); - lo = __kmpc_impl_shfl_down_sync(0xFFFFFFFF, lo, delta, size); + hi = __kmpc_impl_shfl_down_sync(__kmpc_impl_all_lanes, hi, delta, size); + lo = __kmpc_impl_shfl_down_sync(__kmpc_impl_all_lanes, lo, delta, size); return __kmpc_impl_pack(lo, hi); } @@ -82,7 +82,7 @@ int32_t __kmpc_nvptx_simd_reduce_nowait(int32_t global_tid, int32_t num_vars, kmp_ShuffleReductFctPtr shflFct, kmp_InterWarpCopyFctPtr cpyFct) { __kmpc_impl_lanemask_t Liveness = __kmpc_impl_activemask(); - if (Liveness == 0xffffffff) { + if (Liveness == __kmpc_impl_all_lanes) { gpu_regular_warp_reduce(reduce_data, shflFct); return GetThreadIdInBlock() % WARPSIZE == 0; // Result on lane 0 of the simd warp. @@ -143,7 +143,7 @@ static int32_t nvptx_parallel_reduce_nowait( return BlockThreadId == 0; #else __kmpc_impl_lanemask_t Liveness = __kmpc_impl_activemask(); - if (Liveness == 0xffffffff) // Full warp + if (Liveness == __kmpc_impl_all_lanes) // Full warp gpu_regular_warp_reduce(reduce_data, shflFct); else if (!(Liveness & (Liveness + 1))) // Partial warp but contiguous lanes gpu_irregular_warp_reduce(reduce_data, shflFct, @@ -318,7 +318,7 @@ static int32_t nvptx_teams_reduce_nowait(int32_t global_tid, int32_t num_vars, // Reduce across warps to the warp master. __kmpc_impl_lanemask_t Liveness = __kmpc_impl_activemask(); - if (Liveness == 0xffffffff) // Full warp + if (Liveness == __kmpc_impl_all_lanes) // Full warp gpu_regular_warp_reduce(reduce_data, shflFct); else // Partial warp but contiguous lanes gpu_irregular_warp_reduce(reduce_data, shflFct, diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h index 80081431ed902..37a125d5e6f0d 100644 --- a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h +++ b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h @@ -27,6 +27,8 @@ INLINE uint64_t __kmpc_impl_pack(uint32_t lo, uint32_t hi) { } typedef uint32_t __kmpc_impl_lanemask_t; +static const __kmpc_impl_lanemask_t __kmpc_impl_all_lanes = + UINT32_C(0xffffffff); INLINE __kmpc_impl_lanemask_t __kmpc_impl_lanemask_lt() { __kmpc_impl_lanemask_t res; From 68eefbb0643d2972d917ef854dc021a373176603 Mon Sep 17 00:00:00 2001 From: Julian Lettner Date: Fri, 4 Oct 2019 21:40:20 +0000 Subject: [PATCH 008/254] [lit] Use better name for "test in parallel" concept In the past, lit used threads to run tests in parallel. Today we use `multiprocessing.Pool`, which uses processes. Let's stay more abstract and use "worker" everywhere. Reviewed By: rnk Differential Revision: https://reviews.llvm.org/D68475 llvm-svn: 373794 --- llvm/utils/lit/lit/main.py | 23 +++++++++++----------- llvm/utils/lit/lit/run.py | 20 +++++++++---------- llvm/utils/lit/tests/discovery.py | 2 +- llvm/utils/lit/tests/parallelism-groups.py | 2 +- 4 files changed, 22 insertions(+), 25 deletions(-) diff --git a/llvm/utils/lit/lit/main.py b/llvm/utils/lit/lit/main.py index 49aaf638a2213..fc9c0b406c64e 100755 --- a/llvm/utils/lit/lit/main.py +++ b/llvm/utils/lit/lit/main.py @@ -209,8 +209,8 @@ def main_with_tmp(builtinParameters): parser.add_argument("--version", dest="show_version", help="Show version and exit", action="store_true", default=False) - parser.add_argument("-j", "--threads", dest="numThreads", metavar="N", - help="Number of testing threads", + parser.add_argument("-j", "--threads", "--workers", dest="numWorkers", metavar="N", + help="Number of workers used for testing", type=int, default=None) parser.add_argument("--config-prefix", dest="configPrefix", metavar="NAME", help="Prefix for 'lit' config files", @@ -334,10 +334,10 @@ def main_with_tmp(builtinParameters): if not args: parser.error('No inputs specified') - if opts.numThreads is None: - opts.numThreads = lit.util.detectCPUs() - elif opts.numThreads <= 0: - parser.error("Option '--threads' or '-j' requires positive integer") + if opts.numWorkers is None: + opts.numWorkers = lit.util.detectCPUs() + elif opts.numWorkers <= 0: + parser.error("Option '--workers' or '-j' requires positive integer") if opts.maxFailures is not None and opts.maxFailures <= 0: parser.error("Option '--max-failures' requires positive integer") @@ -480,8 +480,8 @@ def main_with_tmp(builtinParameters): if opts.maxTests is not None: run.tests = run.tests[:opts.maxTests] - # Don't create more threads than tests. - opts.numThreads = min(len(run.tests), opts.numThreads) + # Don't create more workers than tests. + opts.numWorkers = min(len(run.tests), opts.numWorkers) # Because some tests use threads internally, and at least on Linux each # of these threads counts toward the current process limit, try to @@ -489,7 +489,7 @@ def main_with_tmp(builtinParameters): # resource exhaustion. try: cpus = lit.util.detectCPUs() - desired_limit = opts.numThreads * cpus * 2 # the 2 is a safety factor + desired_limit = opts.numWorkers * cpus * 2 # the 2 is a safety factor # Import the resource module here inside this try block because it # will likely fail on Windows. @@ -506,8 +506,7 @@ def main_with_tmp(builtinParameters): pass extra = (' of %d' % numTotalTests) if (len(run.tests) != numTotalTests) else '' - threads = 'single process' if (opts.numThreads == 1) else ('%d threads' % opts.numThreads) - header = '-- Testing: %d%s tests, %s --' % (len(run.tests), extra, threads) + header = '-- Testing: %d%s tests, %d workers --' % (len(run.tests), extra, opts.numWorkers) progressBar = None if not opts.quiet: if opts.succinct and opts.useProgressBar: @@ -523,7 +522,7 @@ def main_with_tmp(builtinParameters): startTime = time.time() display = TestingProgressDisplay(opts, len(run.tests), progressBar) try: - run.execute_tests(display, opts.numThreads, opts.maxTime) + run.execute_tests(display, opts.numWorkers, opts.maxTime) except KeyboardInterrupt: sys.exit(2) display.finish() diff --git a/llvm/utils/lit/lit/run.py b/llvm/utils/lit/lit/run.py index 18e754addd328..dbd0822b695e2 100644 --- a/llvm/utils/lit/lit/run.py +++ b/llvm/utils/lit/lit/run.py @@ -37,7 +37,7 @@ def __init__(self, lit_config, tests): multiprocessing.BoundedSemaphore(v) for k, v in lit_config.parallelism_groups.items()} - def execute_tests_in_pool(self, jobs, max_time): + def _execute_tests_in_pool(self, workers, max_time): # We need to issue many wait calls, so compute the final deadline and # subtract time.time() from that as we go along. deadline = None @@ -49,7 +49,7 @@ def execute_tests_in_pool(self, jobs, max_time): # interrupts the workers before we make it into our task callback, they # will each raise a KeyboardInterrupt exception and print to stderr at # the same time. - pool = multiprocessing.Pool(jobs, lit.worker.initializer, + pool = multiprocessing.Pool(workers, lit.worker.initializer, (self.lit_config, self.parallelism_semaphores)) @@ -93,11 +93,11 @@ def console_ctrl_handler(type): finally: pool.join() - def execute_tests(self, display, jobs, max_time=None): + def execute_tests(self, display, workers, max_time=None): """ - execute_tests(display, jobs, [max_time]) + execute_tests(display, workers, [max_time]) - Execute each of the tests in the run, using up to jobs number of + Execute the tests in the run using up to the specified number of parallel tasks, and inform the display of each individual result. The provided tests should be a subset of the tests available in this run object. @@ -105,10 +105,8 @@ def execute_tests(self, display, jobs, max_time=None): If max_time is non-None, it should be a time in seconds after which to stop executing tests. - The display object will have its update method called with each test as - it is completed. The calls are guaranteed to be locked with respect to - one another, but are *not* guaranteed to be called on the same thread as - this method was invoked on. + The display object will have its update method called for each completed + test. Upon completion, each test in the run will have its result computed. Tests which were not actually executed (for any reason) will @@ -124,14 +122,14 @@ def execute_tests(self, display, jobs, max_time=None): self.failure_count = 0 self.hit_max_failures = False - if jobs == 1: + if workers == 1: for test_index, test in enumerate(self.tests): lit.worker._execute_test(test, self.lit_config) self.consume_test_result((test_index, test)) if self.hit_max_failures: break else: - self.execute_tests_in_pool(jobs, max_time) + self._execute_tests_in_pool(workers, max_time) # Mark any tests that weren't run as UNRESOLVED. for test in self.tests: diff --git a/llvm/utils/lit/tests/discovery.py b/llvm/utils/lit/tests/discovery.py index 9f09470c48c57..b15468f10159f 100644 --- a/llvm/utils/lit/tests/discovery.py +++ b/llvm/utils/lit/tests/discovery.py @@ -29,7 +29,7 @@ # RUN: %{python} %{inputs}/config-map-discovery/driver.py \ # RUN: %{inputs}/config-map-discovery/main-config/lit.cfg \ # RUN: %{inputs}/config-map-discovery/lit.alt.cfg \ -# RUN: --threads=1 --debug --show-tests --show-suites > %t.out 2> %t.err +# RUN: --workers=1 --debug --show-tests --show-suites > %t.out 2> %t.err # RUN: FileCheck --check-prefix=CHECK-CONFIG-MAP-OUT < %t.out %s # RUN: FileCheck --check-prefix=CHECK-CONFIG-MAP-ERR < %t.err %s diff --git a/llvm/utils/lit/tests/parallelism-groups.py b/llvm/utils/lit/tests/parallelism-groups.py index c6427bee12491..d80f2318fe05f 100644 --- a/llvm/utils/lit/tests/parallelism-groups.py +++ b/llvm/utils/lit/tests/parallelism-groups.py @@ -15,7 +15,7 @@ # RUN: %{lit} -j2 %{inputs}/parallelism-groups | FileCheck %s -# CHECK: -- Testing: 2 tests, 2 threads -- +# CHECK: -- Testing: 2 tests, 2 workers -- # CHECK-DAG: PASS: parallelism-groups :: test1.txt # CHECK-DAG: PASS: parallelism-groups :: test2.txt # CHECK: Expected Passes : 2 From 68d01d9429019f7c62f3555a503f4ac04c466ab6 Mon Sep 17 00:00:00 2001 From: Davide Italiano Date: Fri, 4 Oct 2019 21:51:35 +0000 Subject: [PATCH 009/254] [debugserver] Don't link against Cocoa, it's not needed. llvm-svn: 373795 --- lldb/tools/debugserver/source/CMakeLists.txt | 2 -- 1 file changed, 2 deletions(-) diff --git a/lldb/tools/debugserver/source/CMakeLists.txt b/lldb/tools/debugserver/source/CMakeLists.txt index dbdea0565b85b..9c5922155872e 100644 --- a/lldb/tools/debugserver/source/CMakeLists.txt +++ b/lldb/tools/debugserver/source/CMakeLists.txt @@ -107,8 +107,6 @@ if(APPLE) if(NOT BACKBOARD_LIBRARY) set(SKIP_TEST_DEBUGSERVER ON CACHE BOOL "" FORCE) endif() - else() - find_library(COCOA_LIBRARY Cocoa) endif() endif() From b3e3934347f284e6358c64db8a1acdfe8785b9c7 Mon Sep 17 00:00:00 2001 From: Sterling Augustine Date: Fri, 4 Oct 2019 22:15:28 +0000 Subject: [PATCH 010/254] Add an off-by-default option to enable testing for gdb pretty printers. Summary: The current version of the pretty printers are not python3 compatible, so turn them off by default until sufficiently improved. Reviewers: MaskRay, tamur Subscribers: mgorny, christof, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D68477 llvm-svn: 373796 --- libcxx/CMakeLists.txt | 1 + libcxx/test/CMakeLists.txt | 16 ++++++++++------ 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/libcxx/CMakeLists.txt b/libcxx/CMakeLists.txt index d1a0658176a76..335e7bc61a269 100644 --- a/libcxx/CMakeLists.txt +++ b/libcxx/CMakeLists.txt @@ -81,6 +81,7 @@ option(LIBCXX_ENABLE_FILESYSTEM "Build filesystem as part of the main libc++ lib ${ENABLE_FILESYSTEM_DEFAULT}) option(LIBCXX_INCLUDE_TESTS "Build the libc++ tests." ${LLVM_INCLUDE_TESTS}) option(LIBCXX_ENABLE_PARALLEL_ALGORITHMS "Enable the parallel algorithms library. This requires the PSTL to be available." OFF) +option(LIBCXX_TEST_GDB_PRETTY_PRINTERS "Test gdb pretty printers." OFF) # Benchmark options ----------------------------------------------------------- option(LIBCXX_INCLUDE_BENCHMARKS "Build the libc++ benchmarks and their dependencies" ON) diff --git a/libcxx/test/CMakeLists.txt b/libcxx/test/CMakeLists.txt index e6ef6284ecabf..81474511b58c5 100644 --- a/libcxx/test/CMakeLists.txt +++ b/libcxx/test/CMakeLists.txt @@ -60,12 +60,16 @@ if (NOT DEFINED LIBCXX_TEST_DEPS) message(FATAL_ERROR "Expected LIBCXX_TEST_DEPS to be defined") endif() -find_program(LIBCXX_GDB gdb) -if (LIBCXX_GDB) - set(LIBCXX_GDB "${LIBCXX_GDB}") - message(STATUS "gdb found: ${LIBCXX_GDB}") -else() - message(STATUS "gdb not found. Disabling dependent tests.") +# Turn this on by default when the pretty printers are python3 +# compatible. +if(LIBCXX_TEST_GDB_PRETTY_PRINTERS) + find_program(LIBCXX_GDB gdb) + if (LIBCXX_GDB) + set(LIBCXX_GDB "${LIBCXX_GDB}") + message(STATUS "gdb found: ${LIBCXX_GDB}") + else() + message(STATUS "gdb not found. Disabling dependent tests.") + endif() endif() if (LIBCXX_INCLUDE_TESTS) From 6a954748c8bf75525cb688467329d6c84a65ed0b Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Fri, 4 Oct 2019 22:15:32 +0000 Subject: [PATCH 011/254] [NFC][InstCombine] Tests for right-shift shift amount reassociation (w/ trunc) (PR43564, PR42391) https://rise4fun.com/Alive/GEw llvm-svn: 373797 --- ...ount-reassociation-with-truncation-ashr.ll | 201 ++++++++++++++++++ ...ount-reassociation-with-truncation-lshr.ll | 201 ++++++++++++++++++ ...mount-reassociation-with-truncation-shl.ll | 34 --- 3 files changed, 402 insertions(+), 34 deletions(-) create mode 100644 llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-ashr.ll create mode 100644 llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-lshr.ll diff --git a/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-ashr.ll b/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-ashr.ll new file mode 100644 index 0000000000000..d9571f250ccfa --- /dev/null +++ b/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-ashr.ll @@ -0,0 +1,201 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt %s -instcombine -S | FileCheck %s + +; Given pattern: +; (trunc (iSrc x a>> Q) to iDst) a>> K +; we should rewrite it as +; (trunc (iSrc x a>> (Q+K)) to iDst) +; iff (Q+K) is bitwidth(iSrc)-1 +; THIS FOLD DOES *NOT* REQUIRE ANY 'nuw'/`nsw` FLAGS! + +; Basic scalar test + +define i16 @t0(i32 %x, i16 %y) { +; CHECK-LABEL: @t0( +; CHECK-NEXT: [[T0:%.*]] = sub i16 32, [[Y:%.*]] +; CHECK-NEXT: [[T1:%.*]] = zext i16 [[T0]] to i32 +; CHECK-NEXT: [[T2:%.*]] = ashr i32 [[X:%.*]], [[T1]] +; CHECK-NEXT: [[T3:%.*]] = trunc i32 [[T2]] to i16 +; CHECK-NEXT: [[T4:%.*]] = add i16 [[Y]], -1 +; CHECK-NEXT: [[T5:%.*]] = ashr i16 [[T3]], [[T4]] +; CHECK-NEXT: ret i16 [[T5]] +; + %t0 = sub i16 32, %y + %t1 = zext i16 %t0 to i32 + %t2 = ashr i32 %x, %t1 + %t3 = trunc i32 %t2 to i16 + %t4 = add i16 %y, -1 + %t5 = ashr i16 %t3, %t4 + ret i16 %t5 +} + +; Basic vector tests + +define <2 x i16> @t1_vec_splat(<2 x i32> %x, <2 x i16> %y) { +; CHECK-LABEL: @t1_vec_splat( +; CHECK-NEXT: [[T0:%.*]] = sub <2 x i16> , [[Y:%.*]] +; CHECK-NEXT: [[T1:%.*]] = zext <2 x i16> [[T0]] to <2 x i32> +; CHECK-NEXT: [[T2:%.*]] = ashr <2 x i32> [[X:%.*]], [[T1]] +; CHECK-NEXT: [[T3:%.*]] = trunc <2 x i32> [[T2]] to <2 x i16> +; CHECK-NEXT: [[T4:%.*]] = add <2 x i16> [[Y]], +; CHECK-NEXT: [[T5:%.*]] = ashr <2 x i16> [[T3]], [[T4]] +; CHECK-NEXT: ret <2 x i16> [[T5]] +; + %t0 = sub <2 x i16> , %y + %t1 = zext <2 x i16> %t0 to <2 x i32> + %t2 = ashr <2 x i32> %x, %t1 + %t3 = trunc <2 x i32> %t2 to <2 x i16> + %t4 = add <2 x i16> %y, + %t5 = ashr <2 x i16> %t3, %t4 + ret <2 x i16> %t5 +} + +define <3 x i16> @t3_vec_nonsplat_undef0(<3 x i32> %x, <3 x i16> %y) { +; CHECK-LABEL: @t3_vec_nonsplat_undef0( +; CHECK-NEXT: [[T0:%.*]] = sub <3 x i16> , [[Y:%.*]] +; CHECK-NEXT: [[T1:%.*]] = zext <3 x i16> [[T0]] to <3 x i32> +; CHECK-NEXT: [[T2:%.*]] = ashr <3 x i32> [[X:%.*]], [[T1]] +; CHECK-NEXT: [[T3:%.*]] = trunc <3 x i32> [[T2]] to <3 x i16> +; CHECK-NEXT: [[T4:%.*]] = add <3 x i16> [[Y]], +; CHECK-NEXT: [[T5:%.*]] = ashr <3 x i16> [[T3]], [[T4]] +; CHECK-NEXT: ret <3 x i16> [[T5]] +; + %t0 = sub <3 x i16> , %y + %t1 = zext <3 x i16> %t0 to <3 x i32> + %t2 = ashr <3 x i32> %x, %t1 + %t3 = trunc <3 x i32> %t2 to <3 x i16> + %t4 = add <3 x i16> %y, + %t5 = ashr <3 x i16> %t3, %t4 + ret <3 x i16> %t5 +} + +define <3 x i16> @t4_vec_nonsplat_undef1(<3 x i32> %x, <3 x i16> %y) { +; CHECK-LABEL: @t4_vec_nonsplat_undef1( +; CHECK-NEXT: [[T0:%.*]] = sub <3 x i16> , [[Y:%.*]] +; CHECK-NEXT: [[T1:%.*]] = zext <3 x i16> [[T0]] to <3 x i32> +; CHECK-NEXT: [[T2:%.*]] = ashr <3 x i32> [[X:%.*]], [[T1]] +; CHECK-NEXT: [[T3:%.*]] = trunc <3 x i32> [[T2]] to <3 x i16> +; CHECK-NEXT: [[T4:%.*]] = add <3 x i16> [[Y]], +; CHECK-NEXT: [[T5:%.*]] = ashr <3 x i16> [[T3]], [[T4]] +; CHECK-NEXT: ret <3 x i16> [[T5]] +; + %t0 = sub <3 x i16> , %y + %t1 = zext <3 x i16> %t0 to <3 x i32> + %t2 = ashr <3 x i32> %x, %t1 + %t3 = trunc <3 x i32> %t2 to <3 x i16> + %t4 = add <3 x i16> %y, + %t5 = ashr <3 x i16> %t3, %t4 + ret <3 x i16> %t5 +} + +define <3 x i16> @t5_vec_nonsplat_undef1(<3 x i32> %x, <3 x i16> %y) { +; CHECK-LABEL: @t5_vec_nonsplat_undef1( +; CHECK-NEXT: [[T0:%.*]] = sub <3 x i16> , [[Y:%.*]] +; CHECK-NEXT: [[T1:%.*]] = zext <3 x i16> [[T0]] to <3 x i32> +; CHECK-NEXT: [[T2:%.*]] = ashr <3 x i32> [[X:%.*]], [[T1]] +; CHECK-NEXT: [[T3:%.*]] = trunc <3 x i32> [[T2]] to <3 x i16> +; CHECK-NEXT: [[T4:%.*]] = add <3 x i16> [[Y]], +; CHECK-NEXT: [[T5:%.*]] = ashr <3 x i16> [[T3]], [[T4]] +; CHECK-NEXT: ret <3 x i16> [[T5]] +; + %t0 = sub <3 x i16> , %y + %t1 = zext <3 x i16> %t0 to <3 x i32> + %t2 = ashr <3 x i32> %x, %t1 + %t3 = trunc <3 x i32> %t2 to <3 x i16> + %t4 = add <3 x i16> %y, + %t5 = ashr <3 x i16> %t3, %t4 + ret <3 x i16> %t5 +} + +; One-use tests + +declare void @use16(i16) +declare void @use32(i32) + +define i16 @t6_extrause0(i32 %x, i16 %y) { +; CHECK-LABEL: @t6_extrause0( +; CHECK-NEXT: [[T0:%.*]] = sub i16 32, [[Y:%.*]] +; CHECK-NEXT: [[T1:%.*]] = zext i16 [[T0]] to i32 +; CHECK-NEXT: [[T2:%.*]] = ashr i32 [[X:%.*]], [[T1]] +; CHECK-NEXT: [[T3:%.*]] = trunc i32 [[T2]] to i16 +; CHECK-NEXT: [[T4:%.*]] = add i16 [[Y]], -1 +; CHECK-NEXT: call void @use16(i16 [[T3]]) +; CHECK-NEXT: [[T5:%.*]] = ashr i16 [[T3]], [[T4]] +; CHECK-NEXT: ret i16 [[T5]] +; + %t0 = sub i16 32, %y + %t1 = zext i16 %t0 to i32 + %t2 = ashr i32 %x, %t1 + %t3 = trunc i32 %t2 to i16 + %t4 = add i16 %y, -1 + call void @use16(i16 %t3) + %t5 = ashr i16 %t3, %t4 + ret i16 %t5 +} + +define i16 @t7_extrause1(i32 %x, i16 %y) { +; CHECK-LABEL: @t7_extrause1( +; CHECK-NEXT: [[T0:%.*]] = sub i16 32, [[Y:%.*]] +; CHECK-NEXT: [[T1:%.*]] = zext i16 [[T0]] to i32 +; CHECK-NEXT: [[T2:%.*]] = ashr i32 [[X:%.*]], [[T1]] +; CHECK-NEXT: [[T3:%.*]] = trunc i32 [[T2]] to i16 +; CHECK-NEXT: [[T4:%.*]] = add i16 [[Y]], -1 +; CHECK-NEXT: call void @use16(i16 [[T4]]) +; CHECK-NEXT: [[T5:%.*]] = ashr i16 [[T3]], [[T4]] +; CHECK-NEXT: ret i16 [[T5]] +; + %t0 = sub i16 32, %y + %t1 = zext i16 %t0 to i32 + %t2 = ashr i32 %x, %t1 + %t3 = trunc i32 %t2 to i16 + %t4 = add i16 %y, -1 + call void @use16(i16 %t4) + %t5 = ashr i16 %t3, %t4 + ret i16 %t5 +} + +define i16 @t8_extrause2(i32 %x, i16 %y) { +; CHECK-LABEL: @t8_extrause2( +; CHECK-NEXT: [[T0:%.*]] = sub i16 32, [[Y:%.*]] +; CHECK-NEXT: [[T1:%.*]] = zext i16 [[T0]] to i32 +; CHECK-NEXT: [[T2:%.*]] = ashr i32 [[X:%.*]], [[T1]] +; CHECK-NEXT: [[T3:%.*]] = trunc i32 [[T2]] to i16 +; CHECK-NEXT: [[T4:%.*]] = add i16 [[Y]], -1 +; CHECK-NEXT: call void @use16(i16 [[T3]]) +; CHECK-NEXT: call void @use16(i16 [[T4]]) +; CHECK-NEXT: [[T5:%.*]] = ashr i16 [[T3]], [[T4]] +; CHECK-NEXT: ret i16 [[T5]] +; + %t0 = sub i16 32, %y + %t1 = zext i16 %t0 to i32 + %t2 = ashr i32 %x, %t1 + %t3 = trunc i32 %t2 to i16 + %t4 = add i16 %y, -1 + call void @use16(i16 %t3) + call void @use16(i16 %t4) + %t5 = ashr i16 %t3, %t4 + ret i16 %t5 +} + +; No 'nuw'/'nsw' flags are to be propagated! +; But we can't test that, such IR does not reach that code. + +; Negative tests + +; Can only fold if we are extracting the sign bit. +define i16 @t9_ashr(i32 %x, i16 %y) { +; CHECK-LABEL: @t9_ashr( +; CHECK-NEXT: [[T0:%.*]] = sub i16 32, [[Y:%.*]] +; CHECK-NEXT: [[T1:%.*]] = zext i16 [[T0]] to i32 +; CHECK-NEXT: [[T2:%.*]] = ashr i32 [[X:%.*]], [[T1]] +; CHECK-NEXT: [[T3:%.*]] = trunc i32 [[T2]] to i16 +; CHECK-NEXT: ret i16 [[T3]] +; + %t0 = sub i16 32, %y + %t1 = zext i16 %t0 to i32 + %t2 = ashr i32 %x, %t1 + %t3 = trunc i32 %t2 to i16 + %t4 = add i16 %y, -2 + %t5 = ashr i16 %t3, %t4 + ret i16 %t3 +} diff --git a/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-lshr.ll b/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-lshr.ll new file mode 100644 index 0000000000000..3eae56d52d5dd --- /dev/null +++ b/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-lshr.ll @@ -0,0 +1,201 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt %s -instcombine -S | FileCheck %s + +; Given pattern: +; (trunc (iSrc x l>> Q) to iDst) l>> K +; we should rewrite it as +; (trunc (iSrc x l>> (Q+K)) to iDst) +; iff (Q+K) is bitwidth(iSrc)-1 +; THIS FOLD DOES *NOT* REQUIRE ANY 'nuw'/`nsw` FLAGS! + +; Basic scalar test + +define i16 @t0(i32 %x, i16 %y) { +; CHECK-LABEL: @t0( +; CHECK-NEXT: [[T0:%.*]] = sub i16 32, [[Y:%.*]] +; CHECK-NEXT: [[T1:%.*]] = zext i16 [[T0]] to i32 +; CHECK-NEXT: [[T2:%.*]] = lshr i32 [[X:%.*]], [[T1]] +; CHECK-NEXT: [[T3:%.*]] = trunc i32 [[T2]] to i16 +; CHECK-NEXT: [[T4:%.*]] = add i16 [[Y]], -1 +; CHECK-NEXT: [[T5:%.*]] = lshr i16 [[T3]], [[T4]] +; CHECK-NEXT: ret i16 [[T5]] +; + %t0 = sub i16 32, %y + %t1 = zext i16 %t0 to i32 + %t2 = lshr i32 %x, %t1 + %t3 = trunc i32 %t2 to i16 + %t4 = add i16 %y, -1 + %t5 = lshr i16 %t3, %t4 + ret i16 %t5 +} + +; Basic vector tests + +define <2 x i16> @t1_vec_splat(<2 x i32> %x, <2 x i16> %y) { +; CHECK-LABEL: @t1_vec_splat( +; CHECK-NEXT: [[T0:%.*]] = sub <2 x i16> , [[Y:%.*]] +; CHECK-NEXT: [[T1:%.*]] = zext <2 x i16> [[T0]] to <2 x i32> +; CHECK-NEXT: [[T2:%.*]] = lshr <2 x i32> [[X:%.*]], [[T1]] +; CHECK-NEXT: [[T3:%.*]] = trunc <2 x i32> [[T2]] to <2 x i16> +; CHECK-NEXT: [[T4:%.*]] = add <2 x i16> [[Y]], +; CHECK-NEXT: [[T5:%.*]] = lshr <2 x i16> [[T3]], [[T4]] +; CHECK-NEXT: ret <2 x i16> [[T5]] +; + %t0 = sub <2 x i16> , %y + %t1 = zext <2 x i16> %t0 to <2 x i32> + %t2 = lshr <2 x i32> %x, %t1 + %t3 = trunc <2 x i32> %t2 to <2 x i16> + %t4 = add <2 x i16> %y, + %t5 = lshr <2 x i16> %t3, %t4 + ret <2 x i16> %t5 +} + +define <3 x i16> @t3_vec_nonsplat_undef0(<3 x i32> %x, <3 x i16> %y) { +; CHECK-LABEL: @t3_vec_nonsplat_undef0( +; CHECK-NEXT: [[T0:%.*]] = sub <3 x i16> , [[Y:%.*]] +; CHECK-NEXT: [[T1:%.*]] = zext <3 x i16> [[T0]] to <3 x i32> +; CHECK-NEXT: [[T2:%.*]] = lshr <3 x i32> [[X:%.*]], [[T1]] +; CHECK-NEXT: [[T3:%.*]] = trunc <3 x i32> [[T2]] to <3 x i16> +; CHECK-NEXT: [[T4:%.*]] = add <3 x i16> [[Y]], +; CHECK-NEXT: [[T5:%.*]] = lshr <3 x i16> [[T3]], [[T4]] +; CHECK-NEXT: ret <3 x i16> [[T5]] +; + %t0 = sub <3 x i16> , %y + %t1 = zext <3 x i16> %t0 to <3 x i32> + %t2 = lshr <3 x i32> %x, %t1 + %t3 = trunc <3 x i32> %t2 to <3 x i16> + %t4 = add <3 x i16> %y, + %t5 = lshr <3 x i16> %t3, %t4 + ret <3 x i16> %t5 +} + +define <3 x i16> @t4_vec_nonsplat_undef1(<3 x i32> %x, <3 x i16> %y) { +; CHECK-LABEL: @t4_vec_nonsplat_undef1( +; CHECK-NEXT: [[T0:%.*]] = sub <3 x i16> , [[Y:%.*]] +; CHECK-NEXT: [[T1:%.*]] = zext <3 x i16> [[T0]] to <3 x i32> +; CHECK-NEXT: [[T2:%.*]] = lshr <3 x i32> [[X:%.*]], [[T1]] +; CHECK-NEXT: [[T3:%.*]] = trunc <3 x i32> [[T2]] to <3 x i16> +; CHECK-NEXT: [[T4:%.*]] = add <3 x i16> [[Y]], +; CHECK-NEXT: [[T5:%.*]] = lshr <3 x i16> [[T3]], [[T4]] +; CHECK-NEXT: ret <3 x i16> [[T5]] +; + %t0 = sub <3 x i16> , %y + %t1 = zext <3 x i16> %t0 to <3 x i32> + %t2 = lshr <3 x i32> %x, %t1 + %t3 = trunc <3 x i32> %t2 to <3 x i16> + %t4 = add <3 x i16> %y, + %t5 = lshr <3 x i16> %t3, %t4 + ret <3 x i16> %t5 +} + +define <3 x i16> @t5_vec_nonsplat_undef1(<3 x i32> %x, <3 x i16> %y) { +; CHECK-LABEL: @t5_vec_nonsplat_undef1( +; CHECK-NEXT: [[T0:%.*]] = sub <3 x i16> , [[Y:%.*]] +; CHECK-NEXT: [[T1:%.*]] = zext <3 x i16> [[T0]] to <3 x i32> +; CHECK-NEXT: [[T2:%.*]] = lshr <3 x i32> [[X:%.*]], [[T1]] +; CHECK-NEXT: [[T3:%.*]] = trunc <3 x i32> [[T2]] to <3 x i16> +; CHECK-NEXT: [[T4:%.*]] = add <3 x i16> [[Y]], +; CHECK-NEXT: [[T5:%.*]] = lshr <3 x i16> [[T3]], [[T4]] +; CHECK-NEXT: ret <3 x i16> [[T5]] +; + %t0 = sub <3 x i16> , %y + %t1 = zext <3 x i16> %t0 to <3 x i32> + %t2 = lshr <3 x i32> %x, %t1 + %t3 = trunc <3 x i32> %t2 to <3 x i16> + %t4 = add <3 x i16> %y, + %t5 = lshr <3 x i16> %t3, %t4 + ret <3 x i16> %t5 +} + +; One-use tests + +declare void @use16(i16) +declare void @use32(i32) + +define i16 @t6_extrause0(i32 %x, i16 %y) { +; CHECK-LABEL: @t6_extrause0( +; CHECK-NEXT: [[T0:%.*]] = sub i16 32, [[Y:%.*]] +; CHECK-NEXT: [[T1:%.*]] = zext i16 [[T0]] to i32 +; CHECK-NEXT: [[T2:%.*]] = lshr i32 [[X:%.*]], [[T1]] +; CHECK-NEXT: [[T3:%.*]] = trunc i32 [[T2]] to i16 +; CHECK-NEXT: [[T4:%.*]] = add i16 [[Y]], -1 +; CHECK-NEXT: call void @use16(i16 [[T3]]) +; CHECK-NEXT: [[T5:%.*]] = lshr i16 [[T3]], [[T4]] +; CHECK-NEXT: ret i16 [[T5]] +; + %t0 = sub i16 32, %y + %t1 = zext i16 %t0 to i32 + %t2 = lshr i32 %x, %t1 + %t3 = trunc i32 %t2 to i16 + %t4 = add i16 %y, -1 + call void @use16(i16 %t3) + %t5 = lshr i16 %t3, %t4 + ret i16 %t5 +} + +define i16 @t7_extrause1(i32 %x, i16 %y) { +; CHECK-LABEL: @t7_extrause1( +; CHECK-NEXT: [[T0:%.*]] = sub i16 32, [[Y:%.*]] +; CHECK-NEXT: [[T1:%.*]] = zext i16 [[T0]] to i32 +; CHECK-NEXT: [[T2:%.*]] = lshr i32 [[X:%.*]], [[T1]] +; CHECK-NEXT: [[T3:%.*]] = trunc i32 [[T2]] to i16 +; CHECK-NEXT: [[T4:%.*]] = add i16 [[Y]], -1 +; CHECK-NEXT: call void @use16(i16 [[T4]]) +; CHECK-NEXT: [[T5:%.*]] = lshr i16 [[T3]], [[T4]] +; CHECK-NEXT: ret i16 [[T5]] +; + %t0 = sub i16 32, %y + %t1 = zext i16 %t0 to i32 + %t2 = lshr i32 %x, %t1 + %t3 = trunc i32 %t2 to i16 + %t4 = add i16 %y, -1 + call void @use16(i16 %t4) + %t5 = lshr i16 %t3, %t4 + ret i16 %t5 +} + +define i16 @t8_extrause2(i32 %x, i16 %y) { +; CHECK-LABEL: @t8_extrause2( +; CHECK-NEXT: [[T0:%.*]] = sub i16 32, [[Y:%.*]] +; CHECK-NEXT: [[T1:%.*]] = zext i16 [[T0]] to i32 +; CHECK-NEXT: [[T2:%.*]] = lshr i32 [[X:%.*]], [[T1]] +; CHECK-NEXT: [[T3:%.*]] = trunc i32 [[T2]] to i16 +; CHECK-NEXT: [[T4:%.*]] = add i16 [[Y]], -1 +; CHECK-NEXT: call void @use16(i16 [[T3]]) +; CHECK-NEXT: call void @use16(i16 [[T4]]) +; CHECK-NEXT: [[T5:%.*]] = lshr i16 [[T3]], [[T4]] +; CHECK-NEXT: ret i16 [[T5]] +; + %t0 = sub i16 32, %y + %t1 = zext i16 %t0 to i32 + %t2 = lshr i32 %x, %t1 + %t3 = trunc i32 %t2 to i16 + %t4 = add i16 %y, -1 + call void @use16(i16 %t3) + call void @use16(i16 %t4) + %t5 = lshr i16 %t3, %t4 + ret i16 %t5 +} + +; No 'nuw'/'nsw' flags are to be propagated! +; But we can't test that, such IR does not reach that code. + +; Negative tests + +; Can only fold if we are extracting the sign bit. +define i16 @t9_lshr(i32 %x, i16 %y) { +; CHECK-LABEL: @t9_lshr( +; CHECK-NEXT: [[T0:%.*]] = sub i16 32, [[Y:%.*]] +; CHECK-NEXT: [[T1:%.*]] = zext i16 [[T0]] to i32 +; CHECK-NEXT: [[T2:%.*]] = lshr i32 [[X:%.*]], [[T1]] +; CHECK-NEXT: [[T3:%.*]] = trunc i32 [[T2]] to i16 +; CHECK-NEXT: ret i16 [[T3]] +; + %t0 = sub i16 32, %y + %t1 = zext i16 %t0 to i32 + %t2 = lshr i32 %x, %t1 + %t3 = trunc i32 %t2 to i16 + %t4 = add i16 %y, -2 + %t5 = lshr i16 %t3, %t4 + ret i16 %t3 +} diff --git a/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-shl.ll b/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-shl.ll index 6675785929c04..2328ec7965e7f 100644 --- a/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-shl.ll +++ b/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-shl.ll @@ -174,40 +174,6 @@ define i16 @t8_extrause2(i32 %x, i16 %y) { ; Negative tests -; No folding possible for right-shifts.. -define i16 @t9_shl(i32 %x, i16 %y) { -; CHECK-LABEL: @t9_shl( -; CHECK-NEXT: [[T0:%.*]] = sub i16 32, [[Y:%.*]] -; CHECK-NEXT: [[T1:%.*]] = zext i16 [[T0]] to i32 -; CHECK-NEXT: [[T2:%.*]] = lshr i32 [[X:%.*]], [[T1]] -; CHECK-NEXT: [[T3:%.*]] = trunc i32 [[T2]] to i16 -; CHECK-NEXT: ret i16 [[T3]] -; - %t0 = sub i16 32, %y - %t1 = zext i16 %t0 to i32 - %t2 = lshr i32 %x, %t1 - %t3 = trunc i32 %t2 to i16 - %t4 = add i16 %y, -24 - %t5 = lshr i16 %t3, %t4 - ret i16 %t3 -} -define i16 @t10_ashr(i32 %x, i16 %y) { -; CHECK-LABEL: @t10_ashr( -; CHECK-NEXT: [[T0:%.*]] = sub i16 32, [[Y:%.*]] -; CHECK-NEXT: [[T1:%.*]] = zext i16 [[T0]] to i32 -; CHECK-NEXT: [[T2:%.*]] = ashr i32 [[X:%.*]], [[T1]] -; CHECK-NEXT: [[T3:%.*]] = trunc i32 [[T2]] to i16 -; CHECK-NEXT: ret i16 [[T3]] -; - %t0 = sub i16 32, %y - %t1 = zext i16 %t0 to i32 - %t2 = ashr i32 %x, %t1 - %t3 = trunc i32 %t2 to i16 - %t4 = add i16 %y, -24 - %t5 = ashr i16 %t3, %t4 - ret i16 %t3 -} - ; Can't fold, total shift would be 32 define i16 @n11(i32 %x, i16 %y) { ; CHECK-LABEL: @n11( From 3c56cc920fb3cb73528bdc0a65895e08a915150a Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Fri, 4 Oct 2019 22:15:41 +0000 Subject: [PATCH 012/254] [NFC][InstCombine] Tests for bit test via highest sign-bit extract (w/ trunc) (PR43564) https://rise4fun.com/Alive/x5IS llvm-svn: 373798 --- ...-test-via-right-shifting-all-other-bits.ll | 182 ++++++++++++++++++ 1 file changed, 182 insertions(+) create mode 100644 llvm/test/Transforms/InstCombine/sign-bit-test-via-right-shifting-all-other-bits.ll diff --git a/llvm/test/Transforms/InstCombine/sign-bit-test-via-right-shifting-all-other-bits.ll b/llvm/test/Transforms/InstCombine/sign-bit-test-via-right-shifting-all-other-bits.ll new file mode 100644 index 0000000000000..da744ebbbeead --- /dev/null +++ b/llvm/test/Transforms/InstCombine/sign-bit-test-via-right-shifting-all-other-bits.ll @@ -0,0 +1,182 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -instcombine -S | FileCheck %s + +define i1 @highest_bit_test_via_lshr(i32 %data, i32 %nbits) { +; CHECK-LABEL: @highest_bit_test_via_lshr( +; CHECK-NEXT: [[ISNEG:%.*]] = icmp slt i32 [[DATA:%.*]], 0 +; CHECK-NEXT: ret i1 [[ISNEG]] +; + %num_low_bits_to_skip = sub i32 32, %nbits + %high_bits_extracted = lshr i32 %data, %num_low_bits_to_skip + %skip_all_bits_till_signbit = sub i32 %nbits, 1 + %signbit = lshr i32 %high_bits_extracted, %skip_all_bits_till_signbit + %isneg = icmp ne i32 %signbit, 0 + ret i1 %isneg +} + +define i1 @highest_bit_test_via_lshr_with_truncation(i64 %data, i32 %nbits) { +; CHECK-LABEL: @highest_bit_test_via_lshr_with_truncation( +; CHECK-NEXT: [[NUM_LOW_BITS_TO_SKIP:%.*]] = sub i32 64, [[NBITS:%.*]] +; CHECK-NEXT: [[NUM_LOW_BITS_TO_SKIP_WIDE:%.*]] = zext i32 [[NUM_LOW_BITS_TO_SKIP]] to i64 +; CHECK-NEXT: [[HIGH_BITS_EXTRACTED:%.*]] = lshr i64 [[DATA:%.*]], [[NUM_LOW_BITS_TO_SKIP_WIDE]] +; CHECK-NEXT: [[HIGH_BITS_EXTRACTED_NARROW:%.*]] = trunc i64 [[HIGH_BITS_EXTRACTED]] to i32 +; CHECK-NEXT: [[SKIP_ALL_BITS_TILL_SIGNBIT:%.*]] = add i32 [[NBITS]], -1 +; CHECK-NEXT: [[SIGNBIT:%.*]] = lshr i32 [[HIGH_BITS_EXTRACTED_NARROW]], [[SKIP_ALL_BITS_TILL_SIGNBIT]] +; CHECK-NEXT: [[ISNEG:%.*]] = icmp ne i32 [[SIGNBIT]], 0 +; CHECK-NEXT: ret i1 [[ISNEG]] +; + %num_low_bits_to_skip = sub i32 64, %nbits + %num_low_bits_to_skip_wide = zext i32 %num_low_bits_to_skip to i64 + %high_bits_extracted = lshr i64 %data, %num_low_bits_to_skip_wide + %high_bits_extracted_narrow = trunc i64 %high_bits_extracted to i32 + %skip_all_bits_till_signbit = sub i32 %nbits, 1 + %signbit = lshr i32 %high_bits_extracted_narrow, %skip_all_bits_till_signbit + %isneg = icmp ne i32 %signbit, 0 + ret i1 %isneg +} + +define i1 @highest_bit_test_via_ashr(i32 %data, i32 %nbits) { +; CHECK-LABEL: @highest_bit_test_via_ashr( +; CHECK-NEXT: [[ISNEG:%.*]] = icmp slt i32 [[DATA:%.*]], 0 +; CHECK-NEXT: ret i1 [[ISNEG]] +; + %num_low_bits_to_skip = sub i32 32, %nbits + %high_bits_extracted = ashr i32 %data, %num_low_bits_to_skip + %skip_all_bits_till_signbit = sub i32 %nbits, 1 + %signbit = ashr i32 %high_bits_extracted, %skip_all_bits_till_signbit + %isneg = icmp ne i32 %signbit, 0 + ret i1 %isneg +} + +define i1 @highest_bit_test_via_ashr_with_truncation(i64 %data, i32 %nbits) { +; CHECK-LABEL: @highest_bit_test_via_ashr_with_truncation( +; CHECK-NEXT: [[NUM_LOW_BITS_TO_SKIP:%.*]] = sub i32 64, [[NBITS:%.*]] +; CHECK-NEXT: [[NUM_LOW_BITS_TO_SKIP_WIDE:%.*]] = zext i32 [[NUM_LOW_BITS_TO_SKIP]] to i64 +; CHECK-NEXT: [[HIGH_BITS_EXTRACTED:%.*]] = ashr i64 [[DATA:%.*]], [[NUM_LOW_BITS_TO_SKIP_WIDE]] +; CHECK-NEXT: [[HIGH_BITS_EXTRACTED_NARROW:%.*]] = trunc i64 [[HIGH_BITS_EXTRACTED]] to i32 +; CHECK-NEXT: [[SKIP_ALL_BITS_TILL_SIGNBIT:%.*]] = add i32 [[NBITS]], -1 +; CHECK-NEXT: [[SIGNBIT:%.*]] = ashr i32 [[HIGH_BITS_EXTRACTED_NARROW]], [[SKIP_ALL_BITS_TILL_SIGNBIT]] +; CHECK-NEXT: [[ISNEG:%.*]] = icmp ne i32 [[SIGNBIT]], 0 +; CHECK-NEXT: ret i1 [[ISNEG]] +; + %num_low_bits_to_skip = sub i32 64, %nbits + %num_low_bits_to_skip_wide = zext i32 %num_low_bits_to_skip to i64 + %high_bits_extracted = ashr i64 %data, %num_low_bits_to_skip_wide + %high_bits_extracted_narrow = trunc i64 %high_bits_extracted to i32 + %skip_all_bits_till_signbit = sub i32 %nbits, 1 + %signbit = ashr i32 %high_bits_extracted_narrow, %skip_all_bits_till_signbit + %isneg = icmp ne i32 %signbit, 0 + ret i1 %isneg +} + +declare void @use32(i32) +declare void @use64(i64) + +define i1 @unsigned_sign_bit_extract(i32 %x) { +; CHECK-LABEL: @unsigned_sign_bit_extract( +; CHECK-NEXT: [[ISNEG:%.*]] = icmp slt i32 [[X:%.*]], 0 +; CHECK-NEXT: ret i1 [[ISNEG]] +; + %signbit = lshr i32 %x, 31 + %isneg = icmp ne i32 %signbit, 0 + ret i1 %isneg +} +define i1 @unsigned_sign_bit_extract_extrause(i32 %x) { +; CHECK-LABEL: @unsigned_sign_bit_extract_extrause( +; CHECK-NEXT: [[SIGNBIT:%.*]] = lshr i32 [[X:%.*]], 31 +; CHECK-NEXT: call void @use32(i32 [[SIGNBIT]]) +; CHECK-NEXT: [[ISNEG:%.*]] = icmp ne i32 [[SIGNBIT]], 0 +; CHECK-NEXT: ret i1 [[ISNEG]] +; + %signbit = lshr i32 %x, 31 + call void @use32(i32 %signbit) + %isneg = icmp ne i32 %signbit, 0 + ret i1 %isneg +} +define i1 @unsigned_sign_bit_extract_extrause__ispositive(i32 %x) { +; CHECK-LABEL: @unsigned_sign_bit_extract_extrause__ispositive( +; CHECK-NEXT: [[SIGNBIT:%.*]] = lshr i32 [[X:%.*]], 31 +; CHECK-NEXT: call void @use32(i32 [[SIGNBIT]]) +; CHECK-NEXT: [[ISNEG:%.*]] = icmp eq i32 [[SIGNBIT]], 0 +; CHECK-NEXT: ret i1 [[ISNEG]] +; + %signbit = lshr i32 %x, 31 + call void @use32(i32 %signbit) + %isneg = icmp eq i32 %signbit, 0 + ret i1 %isneg +} +define i1 @signed_sign_bit_extract(i32 %x) { +; CHECK-LABEL: @signed_sign_bit_extract( +; CHECK-NEXT: [[ISNEG:%.*]] = icmp slt i32 [[X:%.*]], 0 +; CHECK-NEXT: ret i1 [[ISNEG]] +; + %signsmear = ashr i32 %x, 31 + %isneg = icmp ne i32 %signsmear, 0 + ret i1 %isneg +} +define i1 @signed_sign_bit_extract_extrause(i32 %x) { +; CHECK-LABEL: @signed_sign_bit_extract_extrause( +; CHECK-NEXT: [[SIGNSMEAR:%.*]] = ashr i32 [[X:%.*]], 31 +; CHECK-NEXT: call void @use32(i32 [[SIGNSMEAR]]) +; CHECK-NEXT: [[ISNEG:%.*]] = icmp ne i32 [[SIGNSMEAR]], 0 +; CHECK-NEXT: ret i1 [[ISNEG]] +; + %signsmear = ashr i32 %x, 31 + call void @use32(i32 %signsmear) + %isneg = icmp ne i32 %signsmear, 0 + ret i1 %isneg +} +define i1 @unsigned_sign_bit_extract_with_trunc(i64 %x) { +; CHECK-LABEL: @unsigned_sign_bit_extract_with_trunc( +; CHECK-NEXT: [[ISNEG:%.*]] = icmp slt i64 [[X:%.*]], 0 +; CHECK-NEXT: ret i1 [[ISNEG]] +; + %signbit = lshr i64 %x, 63 + %signbit_narrow = trunc i64 %signbit to i32 + %isneg = icmp ne i32 %signbit_narrow, 0 + ret i1 %isneg +} +define i1 @unsigned_sign_bit_extract_with_trunc_extrause(i64 %x) { +; CHECK-LABEL: @unsigned_sign_bit_extract_with_trunc_extrause( +; CHECK-NEXT: [[SIGNBIT:%.*]] = lshr i64 [[X:%.*]], 63 +; CHECK-NEXT: call void @use64(i64 [[SIGNBIT]]) +; CHECK-NEXT: [[SIGNBIT_NARROW:%.*]] = trunc i64 [[SIGNBIT]] to i32 +; CHECK-NEXT: call void @use32(i32 [[SIGNBIT_NARROW]]) +; CHECK-NEXT: [[ISNEG:%.*]] = icmp ne i32 [[SIGNBIT_NARROW]], 0 +; CHECK-NEXT: ret i1 [[ISNEG]] +; + %signbit = lshr i64 %x, 63 + call void @use64(i64 %signbit) + %signbit_narrow = trunc i64 %signbit to i32 + call void @use32(i32 %signbit_narrow) + %isneg = icmp ne i32 %signbit_narrow, 0 + ret i1 %isneg +} +define i1 @signed_sign_bit_extract_trunc(i64 %x) { +; CHECK-LABEL: @signed_sign_bit_extract_trunc( +; CHECK-NEXT: [[SIGNSMEAR:%.*]] = ashr i64 [[X:%.*]], 63 +; CHECK-NEXT: [[SIGNSMEAR_NARROW:%.*]] = trunc i64 [[SIGNSMEAR]] to i32 +; CHECK-NEXT: [[ISNEG:%.*]] = icmp ne i32 [[SIGNSMEAR_NARROW]], 0 +; CHECK-NEXT: ret i1 [[ISNEG]] +; + %signsmear = ashr i64 %x, 63 + %signsmear_narrow = trunc i64 %signsmear to i32 + %isneg = icmp ne i32 %signsmear_narrow, 0 + ret i1 %isneg +} +define i1 @signed_sign_bit_extract_trunc_extrause(i64 %x) { +; CHECK-LABEL: @signed_sign_bit_extract_trunc_extrause( +; CHECK-NEXT: [[SIGNSMEAR:%.*]] = ashr i64 [[X:%.*]], 63 +; CHECK-NEXT: call void @use64(i64 [[SIGNSMEAR]]) +; CHECK-NEXT: [[SIGNSMEAR_NARROW:%.*]] = trunc i64 [[SIGNSMEAR]] to i32 +; CHECK-NEXT: call void @use32(i32 [[SIGNSMEAR_NARROW]]) +; CHECK-NEXT: [[ISNEG:%.*]] = icmp ne i32 [[SIGNSMEAR_NARROW]], 0 +; CHECK-NEXT: ret i1 [[ISNEG]] +; + %signsmear = ashr i64 %x, 63 + call void @use64(i64 %signsmear) + %signsmear_narrow = trunc i64 %signsmear to i32 + call void @use32(i32 %signsmear_narrow) + %isneg = icmp ne i32 %signsmear_narrow, 0 + ret i1 %isneg +} From 007452532b3235ce2faf4e6cbcf2b04b39945b20 Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Fri, 4 Oct 2019 22:15:49 +0000 Subject: [PATCH 013/254] [NFC][InstCombine] Autogenerate icmp-shr-lt-gt.ll test llvm-svn: 373799 --- .../Transforms/InstCombine/icmp-shr-lt-gt.ll | 177 +++++++++--------- 1 file changed, 89 insertions(+), 88 deletions(-) diff --git a/llvm/test/Transforms/InstCombine/icmp-shr-lt-gt.ll b/llvm/test/Transforms/InstCombine/icmp-shr-lt-gt.ll index bf1a031a41249..c9a803eb8ea5e 100644 --- a/llvm/test/Transforms/InstCombine/icmp-shr-lt-gt.ll +++ b/llvm/test/Transforms/InstCombine/icmp-shr-lt-gt.ll @@ -1,8 +1,9 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -instcombine -S | FileCheck %s define i1 @lshrugt_01_00(i4 %x) { ; CHECK-LABEL: @lshrugt_01_00( -; CHECK-NEXT: [[C:%.*]] = icmp ugt i4 %x, 1 +; CHECK-NEXT: [[C:%.*]] = icmp ugt i4 [[X:%.*]], 1 ; CHECK-NEXT: ret i1 [[C]] ; %s = lshr i4 %x, 1 @@ -12,7 +13,7 @@ define i1 @lshrugt_01_00(i4 %x) { define i1 @lshrugt_01_01(i4 %x) { ; CHECK-LABEL: @lshrugt_01_01( -; CHECK-NEXT: [[C:%.*]] = icmp ugt i4 %x, 3 +; CHECK-NEXT: [[C:%.*]] = icmp ugt i4 [[X:%.*]], 3 ; CHECK-NEXT: ret i1 [[C]] ; %s = lshr i4 %x, 1 @@ -22,7 +23,7 @@ define i1 @lshrugt_01_01(i4 %x) { define i1 @lshrugt_01_02(i4 %x) { ; CHECK-LABEL: @lshrugt_01_02( -; CHECK-NEXT: [[C:%.*]] = icmp ugt i4 %x, 5 +; CHECK-NEXT: [[C:%.*]] = icmp ugt i4 [[X:%.*]], 5 ; CHECK-NEXT: ret i1 [[C]] ; %s = lshr i4 %x, 1 @@ -32,7 +33,7 @@ define i1 @lshrugt_01_02(i4 %x) { define i1 @lshrugt_01_03(i4 %x) { ; CHECK-LABEL: @lshrugt_01_03( -; CHECK-NEXT: [[C:%.*]] = icmp slt i4 %x, 0 +; CHECK-NEXT: [[C:%.*]] = icmp slt i4 [[X:%.*]], 0 ; CHECK-NEXT: ret i1 [[C]] ; %s = lshr i4 %x, 1 @@ -42,7 +43,7 @@ define i1 @lshrugt_01_03(i4 %x) { define i1 @lshrugt_01_04(i4 %x) { ; CHECK-LABEL: @lshrugt_01_04( -; CHECK-NEXT: [[C:%.*]] = icmp ugt i4 %x, -7 +; CHECK-NEXT: [[C:%.*]] = icmp ugt i4 [[X:%.*]], -7 ; CHECK-NEXT: ret i1 [[C]] ; %s = lshr i4 %x, 1 @@ -52,7 +53,7 @@ define i1 @lshrugt_01_04(i4 %x) { define i1 @lshrugt_01_05(i4 %x) { ; CHECK-LABEL: @lshrugt_01_05( -; CHECK-NEXT: [[C:%.*]] = icmp ugt i4 %x, -5 +; CHECK-NEXT: [[C:%.*]] = icmp ugt i4 [[X:%.*]], -5 ; CHECK-NEXT: ret i1 [[C]] ; %s = lshr i4 %x, 1 @@ -62,7 +63,7 @@ define i1 @lshrugt_01_05(i4 %x) { define i1 @lshrugt_01_06(i4 %x) { ; CHECK-LABEL: @lshrugt_01_06( -; CHECK-NEXT: [[C:%.*]] = icmp ugt i4 %x, -3 +; CHECK-NEXT: [[C:%.*]] = icmp ugt i4 [[X:%.*]], -3 ; CHECK-NEXT: ret i1 [[C]] ; %s = lshr i4 %x, 1 @@ -153,7 +154,7 @@ define i1 @lshrugt_01_15(i4 %x) { define i1 @lshrugt_02_00(i4 %x) { ; CHECK-LABEL: @lshrugt_02_00( -; CHECK-NEXT: [[C:%.*]] = icmp ugt i4 %x, 3 +; CHECK-NEXT: [[C:%.*]] = icmp ugt i4 [[X:%.*]], 3 ; CHECK-NEXT: ret i1 [[C]] ; %s = lshr i4 %x, 2 @@ -163,7 +164,7 @@ define i1 @lshrugt_02_00(i4 %x) { define i1 @lshrugt_02_01(i4 %x) { ; CHECK-LABEL: @lshrugt_02_01( -; CHECK-NEXT: [[C:%.*]] = icmp slt i4 %x, 0 +; CHECK-NEXT: [[C:%.*]] = icmp slt i4 [[X:%.*]], 0 ; CHECK-NEXT: ret i1 [[C]] ; %s = lshr i4 %x, 2 @@ -173,7 +174,7 @@ define i1 @lshrugt_02_01(i4 %x) { define i1 @lshrugt_02_02(i4 %x) { ; CHECK-LABEL: @lshrugt_02_02( -; CHECK-NEXT: [[C:%.*]] = icmp ugt i4 %x, -5 +; CHECK-NEXT: [[C:%.*]] = icmp ugt i4 [[X:%.*]], -5 ; CHECK-NEXT: ret i1 [[C]] ; %s = lshr i4 %x, 2 @@ -300,7 +301,7 @@ define i1 @lshrugt_02_15(i4 %x) { define i1 @lshrugt_03_00(i4 %x) { ; CHECK-LABEL: @lshrugt_03_00( -; CHECK-NEXT: [[C:%.*]] = icmp slt i4 %x, 0 +; CHECK-NEXT: [[C:%.*]] = icmp slt i4 [[X:%.*]], 0 ; CHECK-NEXT: ret i1 [[C]] ; %s = lshr i4 %x, 3 @@ -454,7 +455,7 @@ define i1 @lshrult_01_00(i4 %x) { define i1 @lshrult_01_01(i4 %x) { ; CHECK-LABEL: @lshrult_01_01( -; CHECK-NEXT: [[C:%.*]] = icmp ult i4 %x, 2 +; CHECK-NEXT: [[C:%.*]] = icmp ult i4 [[X:%.*]], 2 ; CHECK-NEXT: ret i1 [[C]] ; %s = lshr i4 %x, 1 @@ -464,7 +465,7 @@ define i1 @lshrult_01_01(i4 %x) { define i1 @lshrult_01_02(i4 %x) { ; CHECK-LABEL: @lshrult_01_02( -; CHECK-NEXT: [[C:%.*]] = icmp ult i4 %x, 4 +; CHECK-NEXT: [[C:%.*]] = icmp ult i4 [[X:%.*]], 4 ; CHECK-NEXT: ret i1 [[C]] ; %s = lshr i4 %x, 1 @@ -474,7 +475,7 @@ define i1 @lshrult_01_02(i4 %x) { define i1 @lshrult_01_03(i4 %x) { ; CHECK-LABEL: @lshrult_01_03( -; CHECK-NEXT: [[C:%.*]] = icmp ult i4 %x, 6 +; CHECK-NEXT: [[C:%.*]] = icmp ult i4 [[X:%.*]], 6 ; CHECK-NEXT: ret i1 [[C]] ; %s = lshr i4 %x, 1 @@ -484,7 +485,7 @@ define i1 @lshrult_01_03(i4 %x) { define i1 @lshrult_01_04(i4 %x) { ; CHECK-LABEL: @lshrult_01_04( -; CHECK-NEXT: [[C:%.*]] = icmp sgt i4 %x, -1 +; CHECK-NEXT: [[C:%.*]] = icmp sgt i4 [[X:%.*]], -1 ; CHECK-NEXT: ret i1 [[C]] ; %s = lshr i4 %x, 1 @@ -494,7 +495,7 @@ define i1 @lshrult_01_04(i4 %x) { define i1 @lshrult_01_05(i4 %x) { ; CHECK-LABEL: @lshrult_01_05( -; CHECK-NEXT: [[C:%.*]] = icmp ult i4 %x, -6 +; CHECK-NEXT: [[C:%.*]] = icmp ult i4 [[X:%.*]], -6 ; CHECK-NEXT: ret i1 [[C]] ; %s = lshr i4 %x, 1 @@ -504,7 +505,7 @@ define i1 @lshrult_01_05(i4 %x) { define i1 @lshrult_01_06(i4 %x) { ; CHECK-LABEL: @lshrult_01_06( -; CHECK-NEXT: [[C:%.*]] = icmp ult i4 %x, -4 +; CHECK-NEXT: [[C:%.*]] = icmp ult i4 [[X:%.*]], -4 ; CHECK-NEXT: ret i1 [[C]] ; %s = lshr i4 %x, 1 @@ -514,7 +515,7 @@ define i1 @lshrult_01_06(i4 %x) { define i1 @lshrult_01_07(i4 %x) { ; CHECK-LABEL: @lshrult_01_07( -; CHECK-NEXT: [[C:%.*]] = icmp ult i4 %x, -2 +; CHECK-NEXT: [[C:%.*]] = icmp ult i4 [[X:%.*]], -2 ; CHECK-NEXT: ret i1 [[C]] ; %s = lshr i4 %x, 1 @@ -605,7 +606,7 @@ define i1 @lshrult_02_00(i4 %x) { define i1 @lshrult_02_01(i4 %x) { ; CHECK-LABEL: @lshrult_02_01( -; CHECK-NEXT: [[C:%.*]] = icmp ult i4 %x, 4 +; CHECK-NEXT: [[C:%.*]] = icmp ult i4 [[X:%.*]], 4 ; CHECK-NEXT: ret i1 [[C]] ; %s = lshr i4 %x, 2 @@ -615,7 +616,7 @@ define i1 @lshrult_02_01(i4 %x) { define i1 @lshrult_02_02(i4 %x) { ; CHECK-LABEL: @lshrult_02_02( -; CHECK-NEXT: [[C:%.*]] = icmp sgt i4 %x, -1 +; CHECK-NEXT: [[C:%.*]] = icmp sgt i4 [[X:%.*]], -1 ; CHECK-NEXT: ret i1 [[C]] ; %s = lshr i4 %x, 2 @@ -625,7 +626,7 @@ define i1 @lshrult_02_02(i4 %x) { define i1 @lshrult_02_03(i4 %x) { ; CHECK-LABEL: @lshrult_02_03( -; CHECK-NEXT: [[C:%.*]] = icmp ult i4 %x, -4 +; CHECK-NEXT: [[C:%.*]] = icmp ult i4 [[X:%.*]], -4 ; CHECK-NEXT: ret i1 [[C]] ; %s = lshr i4 %x, 2 @@ -752,7 +753,7 @@ define i1 @lshrult_03_00(i4 %x) { define i1 @lshrult_03_01(i4 %x) { ; CHECK-LABEL: @lshrult_03_01( -; CHECK-NEXT: [[C:%.*]] = icmp sgt i4 %x, -1 +; CHECK-NEXT: [[C:%.*]] = icmp sgt i4 [[X:%.*]], -1 ; CHECK-NEXT: ret i1 [[C]] ; %s = lshr i4 %x, 3 @@ -888,7 +889,7 @@ define i1 @lshrult_03_15(i4 %x) { define i1 @ashrsgt_01_00(i4 %x) { ; CHECK-LABEL: @ashrsgt_01_00( -; CHECK-NEXT: [[C:%.*]] = icmp sgt i4 %x, 1 +; CHECK-NEXT: [[C:%.*]] = icmp sgt i4 [[X:%.*]], 1 ; CHECK-NEXT: ret i1 [[C]] ; %s = ashr i4 %x, 1 @@ -898,7 +899,7 @@ define i1 @ashrsgt_01_00(i4 %x) { define i1 @ashrsgt_01_01(i4 %x) { ; CHECK-LABEL: @ashrsgt_01_01( -; CHECK-NEXT: [[C:%.*]] = icmp sgt i4 %x, 3 +; CHECK-NEXT: [[C:%.*]] = icmp sgt i4 [[X:%.*]], 3 ; CHECK-NEXT: ret i1 [[C]] ; %s = ashr i4 %x, 1 @@ -908,7 +909,7 @@ define i1 @ashrsgt_01_01(i4 %x) { define i1 @ashrsgt_01_02(i4 %x) { ; CHECK-LABEL: @ashrsgt_01_02( -; CHECK-NEXT: [[C:%.*]] = icmp sgt i4 %x, 5 +; CHECK-NEXT: [[C:%.*]] = icmp sgt i4 [[X:%.*]], 5 ; CHECK-NEXT: ret i1 [[C]] ; %s = ashr i4 %x, 1 @@ -999,7 +1000,7 @@ define i1 @ashrsgt_01_11(i4 %x) { define i1 @ashrsgt_01_12(i4 %x) { ; CHECK-LABEL: @ashrsgt_01_12( -; CHECK-NEXT: [[C:%.*]] = icmp sgt i4 %x, -7 +; CHECK-NEXT: [[C:%.*]] = icmp sgt i4 [[X:%.*]], -7 ; CHECK-NEXT: ret i1 [[C]] ; %s = ashr i4 %x, 1 @@ -1009,7 +1010,7 @@ define i1 @ashrsgt_01_12(i4 %x) { define i1 @ashrsgt_01_13(i4 %x) { ; CHECK-LABEL: @ashrsgt_01_13( -; CHECK-NEXT: [[C:%.*]] = icmp sgt i4 %x, -5 +; CHECK-NEXT: [[C:%.*]] = icmp sgt i4 [[X:%.*]], -5 ; CHECK-NEXT: ret i1 [[C]] ; %s = ashr i4 %x, 1 @@ -1019,7 +1020,7 @@ define i1 @ashrsgt_01_13(i4 %x) { define i1 @ashrsgt_01_14(i4 %x) { ; CHECK-LABEL: @ashrsgt_01_14( -; CHECK-NEXT: [[C:%.*]] = icmp sgt i4 %x, -3 +; CHECK-NEXT: [[C:%.*]] = icmp sgt i4 [[X:%.*]], -3 ; CHECK-NEXT: ret i1 [[C]] ; %s = ashr i4 %x, 1 @@ -1029,7 +1030,7 @@ define i1 @ashrsgt_01_14(i4 %x) { define i1 @ashrsgt_01_15(i4 %x) { ; CHECK-LABEL: @ashrsgt_01_15( -; CHECK-NEXT: [[C:%.*]] = icmp sgt i4 %x, -1 +; CHECK-NEXT: [[C:%.*]] = icmp sgt i4 [[X:%.*]], -1 ; CHECK-NEXT: ret i1 [[C]] ; %s = ashr i4 %x, 1 @@ -1039,7 +1040,7 @@ define i1 @ashrsgt_01_15(i4 %x) { define i1 @ashrsgt_02_00(i4 %x) { ; CHECK-LABEL: @ashrsgt_02_00( -; CHECK-NEXT: [[C:%.*]] = icmp sgt i4 %x, 3 +; CHECK-NEXT: [[C:%.*]] = icmp sgt i4 [[X:%.*]], 3 ; CHECK-NEXT: ret i1 [[C]] ; %s = ashr i4 %x, 2 @@ -1166,7 +1167,7 @@ define i1 @ashrsgt_02_13(i4 %x) { define i1 @ashrsgt_02_14(i4 %x) { ; CHECK-LABEL: @ashrsgt_02_14( -; CHECK-NEXT: [[C:%.*]] = icmp sgt i4 %x, -5 +; CHECK-NEXT: [[C:%.*]] = icmp sgt i4 [[X:%.*]], -5 ; CHECK-NEXT: ret i1 [[C]] ; %s = ashr i4 %x, 2 @@ -1176,7 +1177,7 @@ define i1 @ashrsgt_02_14(i4 %x) { define i1 @ashrsgt_02_15(i4 %x) { ; CHECK-LABEL: @ashrsgt_02_15( -; CHECK-NEXT: [[C:%.*]] = icmp sgt i4 %x, -1 +; CHECK-NEXT: [[C:%.*]] = icmp sgt i4 [[X:%.*]], -1 ; CHECK-NEXT: ret i1 [[C]] ; %s = ashr i4 %x, 2 @@ -1321,7 +1322,7 @@ define i1 @ashrsgt_03_14(i4 %x) { define i1 @ashrsgt_03_15(i4 %x) { ; CHECK-LABEL: @ashrsgt_03_15( -; CHECK-NEXT: [[C:%.*]] = icmp sgt i4 %x, -1 +; CHECK-NEXT: [[C:%.*]] = icmp sgt i4 [[X:%.*]], -1 ; CHECK-NEXT: ret i1 [[C]] ; %s = ashr i4 %x, 3 @@ -1331,7 +1332,7 @@ define i1 @ashrsgt_03_15(i4 %x) { define i1 @ashrslt_01_00(i4 %x) { ; CHECK-LABEL: @ashrslt_01_00( -; CHECK-NEXT: [[C:%.*]] = icmp slt i4 %x, 0 +; CHECK-NEXT: [[C:%.*]] = icmp slt i4 [[X:%.*]], 0 ; CHECK-NEXT: ret i1 [[C]] ; %s = ashr i4 %x, 1 @@ -1341,7 +1342,7 @@ define i1 @ashrslt_01_00(i4 %x) { define i1 @ashrslt_01_01(i4 %x) { ; CHECK-LABEL: @ashrslt_01_01( -; CHECK-NEXT: [[C:%.*]] = icmp slt i4 %x, 2 +; CHECK-NEXT: [[C:%.*]] = icmp slt i4 [[X:%.*]], 2 ; CHECK-NEXT: ret i1 [[C]] ; %s = ashr i4 %x, 1 @@ -1351,7 +1352,7 @@ define i1 @ashrslt_01_01(i4 %x) { define i1 @ashrslt_01_02(i4 %x) { ; CHECK-LABEL: @ashrslt_01_02( -; CHECK-NEXT: [[C:%.*]] = icmp slt i4 %x, 4 +; CHECK-NEXT: [[C:%.*]] = icmp slt i4 [[X:%.*]], 4 ; CHECK-NEXT: ret i1 [[C]] ; %s = ashr i4 %x, 1 @@ -1361,7 +1362,7 @@ define i1 @ashrslt_01_02(i4 %x) { define i1 @ashrslt_01_03(i4 %x) { ; CHECK-LABEL: @ashrslt_01_03( -; CHECK-NEXT: [[C:%.*]] = icmp slt i4 %x, 6 +; CHECK-NEXT: [[C:%.*]] = icmp slt i4 [[X:%.*]], 6 ; CHECK-NEXT: ret i1 [[C]] ; %s = ashr i4 %x, 1 @@ -1452,7 +1453,7 @@ define i1 @ashrslt_01_12(i4 %x) { define i1 @ashrslt_01_13(i4 %x) { ; CHECK-LABEL: @ashrslt_01_13( -; CHECK-NEXT: [[C:%.*]] = icmp slt i4 %x, -6 +; CHECK-NEXT: [[C:%.*]] = icmp slt i4 [[X:%.*]], -6 ; CHECK-NEXT: ret i1 [[C]] ; %s = ashr i4 %x, 1 @@ -1462,7 +1463,7 @@ define i1 @ashrslt_01_13(i4 %x) { define i1 @ashrslt_01_14(i4 %x) { ; CHECK-LABEL: @ashrslt_01_14( -; CHECK-NEXT: [[C:%.*]] = icmp slt i4 %x, -4 +; CHECK-NEXT: [[C:%.*]] = icmp slt i4 [[X:%.*]], -4 ; CHECK-NEXT: ret i1 [[C]] ; %s = ashr i4 %x, 1 @@ -1472,7 +1473,7 @@ define i1 @ashrslt_01_14(i4 %x) { define i1 @ashrslt_01_15(i4 %x) { ; CHECK-LABEL: @ashrslt_01_15( -; CHECK-NEXT: [[C:%.*]] = icmp slt i4 %x, -2 +; CHECK-NEXT: [[C:%.*]] = icmp slt i4 [[X:%.*]], -2 ; CHECK-NEXT: ret i1 [[C]] ; %s = ashr i4 %x, 1 @@ -1482,7 +1483,7 @@ define i1 @ashrslt_01_15(i4 %x) { define i1 @ashrslt_02_00(i4 %x) { ; CHECK-LABEL: @ashrslt_02_00( -; CHECK-NEXT: [[C:%.*]] = icmp slt i4 %x, 0 +; CHECK-NEXT: [[C:%.*]] = icmp slt i4 [[X:%.*]], 0 ; CHECK-NEXT: ret i1 [[C]] ; %s = ashr i4 %x, 2 @@ -1492,7 +1493,7 @@ define i1 @ashrslt_02_00(i4 %x) { define i1 @ashrslt_02_01(i4 %x) { ; CHECK-LABEL: @ashrslt_02_01( -; CHECK-NEXT: [[C:%.*]] = icmp slt i4 %x, 4 +; CHECK-NEXT: [[C:%.*]] = icmp slt i4 [[X:%.*]], 4 ; CHECK-NEXT: ret i1 [[C]] ; %s = ashr i4 %x, 2 @@ -1619,7 +1620,7 @@ define i1 @ashrslt_02_14(i4 %x) { define i1 @ashrslt_02_15(i4 %x) { ; CHECK-LABEL: @ashrslt_02_15( -; CHECK-NEXT: [[C:%.*]] = icmp slt i4 %x, -4 +; CHECK-NEXT: [[C:%.*]] = icmp slt i4 [[X:%.*]], -4 ; CHECK-NEXT: ret i1 [[C]] ; %s = ashr i4 %x, 2 @@ -1629,7 +1630,7 @@ define i1 @ashrslt_02_15(i4 %x) { define i1 @ashrslt_03_00(i4 %x) { ; CHECK-LABEL: @ashrslt_03_00( -; CHECK-NEXT: [[C:%.*]] = icmp slt i4 %x, 0 +; CHECK-NEXT: [[C:%.*]] = icmp slt i4 [[X:%.*]], 0 ; CHECK-NEXT: ret i1 [[C]] ; %s = ashr i4 %x, 3 @@ -1774,7 +1775,7 @@ define i1 @ashrslt_03_15(i4 %x) { define i1 @lshrugt_01_00_exact(i4 %x) { ; CHECK-LABEL: @lshrugt_01_00_exact( -; CHECK-NEXT: [[C:%.*]] = icmp ne i4 %x, 0 +; CHECK-NEXT: [[C:%.*]] = icmp ne i4 [[X:%.*]], 0 ; CHECK-NEXT: ret i1 [[C]] ; %s = lshr exact i4 %x, 1 @@ -1784,7 +1785,7 @@ define i1 @lshrugt_01_00_exact(i4 %x) { define i1 @lshrugt_01_01_exact(i4 %x) { ; CHECK-LABEL: @lshrugt_01_01_exact( -; CHECK-NEXT: [[C:%.*]] = icmp ugt i4 %x, 2 +; CHECK-NEXT: [[C:%.*]] = icmp ugt i4 [[X:%.*]], 2 ; CHECK-NEXT: ret i1 [[C]] ; %s = lshr exact i4 %x, 1 @@ -1794,7 +1795,7 @@ define i1 @lshrugt_01_01_exact(i4 %x) { define i1 @lshrugt_01_02_exact(i4 %x) { ; CHECK-LABEL: @lshrugt_01_02_exact( -; CHECK-NEXT: [[C:%.*]] = icmp ugt i4 %x, 4 +; CHECK-NEXT: [[C:%.*]] = icmp ugt i4 [[X:%.*]], 4 ; CHECK-NEXT: ret i1 [[C]] ; %s = lshr exact i4 %x, 1 @@ -1804,7 +1805,7 @@ define i1 @lshrugt_01_02_exact(i4 %x) { define i1 @lshrugt_01_03_exact(i4 %x) { ; CHECK-LABEL: @lshrugt_01_03_exact( -; CHECK-NEXT: [[C:%.*]] = icmp ugt i4 %x, 6 +; CHECK-NEXT: [[C:%.*]] = icmp ugt i4 [[X:%.*]], 6 ; CHECK-NEXT: ret i1 [[C]] ; %s = lshr exact i4 %x, 1 @@ -1814,7 +1815,7 @@ define i1 @lshrugt_01_03_exact(i4 %x) { define i1 @lshrugt_01_04_exact(i4 %x) { ; CHECK-LABEL: @lshrugt_01_04_exact( -; CHECK-NEXT: [[C:%.*]] = icmp ugt i4 %x, -8 +; CHECK-NEXT: [[C:%.*]] = icmp ugt i4 [[X:%.*]], -8 ; CHECK-NEXT: ret i1 [[C]] ; %s = lshr exact i4 %x, 1 @@ -1824,7 +1825,7 @@ define i1 @lshrugt_01_04_exact(i4 %x) { define i1 @lshrugt_01_05_exact(i4 %x) { ; CHECK-LABEL: @lshrugt_01_05_exact( -; CHECK-NEXT: [[C:%.*]] = icmp ugt i4 %x, -6 +; CHECK-NEXT: [[C:%.*]] = icmp ugt i4 [[X:%.*]], -6 ; CHECK-NEXT: ret i1 [[C]] ; %s = lshr exact i4 %x, 1 @@ -1834,7 +1835,7 @@ define i1 @lshrugt_01_05_exact(i4 %x) { define i1 @lshrugt_01_06_exact(i4 %x) { ; CHECK-LABEL: @lshrugt_01_06_exact( -; CHECK-NEXT: [[C:%.*]] = icmp eq i4 %x, -2 +; CHECK-NEXT: [[C:%.*]] = icmp eq i4 [[X:%.*]], -2 ; CHECK-NEXT: ret i1 [[C]] ; %s = lshr exact i4 %x, 1 @@ -1925,7 +1926,7 @@ define i1 @lshrugt_01_15_exact(i4 %x) { define i1 @lshrugt_02_00_exact(i4 %x) { ; CHECK-LABEL: @lshrugt_02_00_exact( -; CHECK-NEXT: [[C:%.*]] = icmp ne i4 %x, 0 +; CHECK-NEXT: [[C:%.*]] = icmp ne i4 [[X:%.*]], 0 ; CHECK-NEXT: ret i1 [[C]] ; %s = lshr exact i4 %x, 2 @@ -1935,7 +1936,7 @@ define i1 @lshrugt_02_00_exact(i4 %x) { define i1 @lshrugt_02_01_exact(i4 %x) { ; CHECK-LABEL: @lshrugt_02_01_exact( -; CHECK-NEXT: [[C:%.*]] = icmp ugt i4 %x, 4 +; CHECK-NEXT: [[C:%.*]] = icmp ugt i4 [[X:%.*]], 4 ; CHECK-NEXT: ret i1 [[C]] ; %s = lshr exact i4 %x, 2 @@ -1945,7 +1946,7 @@ define i1 @lshrugt_02_01_exact(i4 %x) { define i1 @lshrugt_02_02_exact(i4 %x) { ; CHECK-LABEL: @lshrugt_02_02_exact( -; CHECK-NEXT: [[C:%.*]] = icmp eq i4 %x, -4 +; CHECK-NEXT: [[C:%.*]] = icmp eq i4 [[X:%.*]], -4 ; CHECK-NEXT: ret i1 [[C]] ; %s = lshr exact i4 %x, 2 @@ -2072,7 +2073,7 @@ define i1 @lshrugt_02_15_exact(i4 %x) { define i1 @lshrugt_03_00_exact(i4 %x) { ; CHECK-LABEL: @lshrugt_03_00_exact( -; CHECK-NEXT: [[C:%.*]] = icmp ne i4 %x, 0 +; CHECK-NEXT: [[C:%.*]] = icmp ne i4 [[X:%.*]], 0 ; CHECK-NEXT: ret i1 [[C]] ; %s = lshr exact i4 %x, 3 @@ -2226,7 +2227,7 @@ define i1 @lshrult_01_00_exact(i4 %x) { define i1 @lshrult_01_01_exact(i4 %x) { ; CHECK-LABEL: @lshrult_01_01_exact( -; CHECK-NEXT: [[C:%.*]] = icmp eq i4 %x, 0 +; CHECK-NEXT: [[C:%.*]] = icmp eq i4 [[X:%.*]], 0 ; CHECK-NEXT: ret i1 [[C]] ; %s = lshr exact i4 %x, 1 @@ -2236,7 +2237,7 @@ define i1 @lshrult_01_01_exact(i4 %x) { define i1 @lshrult_01_02_exact(i4 %x) { ; CHECK-LABEL: @lshrult_01_02_exact( -; CHECK-NEXT: [[C:%.*]] = icmp ult i4 %x, 4 +; CHECK-NEXT: [[C:%.*]] = icmp ult i4 [[X:%.*]], 4 ; CHECK-NEXT: ret i1 [[C]] ; %s = lshr exact i4 %x, 1 @@ -2246,7 +2247,7 @@ define i1 @lshrult_01_02_exact(i4 %x) { define i1 @lshrult_01_03_exact(i4 %x) { ; CHECK-LABEL: @lshrult_01_03_exact( -; CHECK-NEXT: [[C:%.*]] = icmp ult i4 %x, 6 +; CHECK-NEXT: [[C:%.*]] = icmp ult i4 [[X:%.*]], 6 ; CHECK-NEXT: ret i1 [[C]] ; %s = lshr exact i4 %x, 1 @@ -2256,7 +2257,7 @@ define i1 @lshrult_01_03_exact(i4 %x) { define i1 @lshrult_01_04_exact(i4 %x) { ; CHECK-LABEL: @lshrult_01_04_exact( -; CHECK-NEXT: [[C:%.*]] = icmp sgt i4 %x, -1 +; CHECK-NEXT: [[C:%.*]] = icmp sgt i4 [[X:%.*]], -1 ; CHECK-NEXT: ret i1 [[C]] ; %s = lshr exact i4 %x, 1 @@ -2266,7 +2267,7 @@ define i1 @lshrult_01_04_exact(i4 %x) { define i1 @lshrult_01_05_exact(i4 %x) { ; CHECK-LABEL: @lshrult_01_05_exact( -; CHECK-NEXT: [[C:%.*]] = icmp ult i4 %x, -6 +; CHECK-NEXT: [[C:%.*]] = icmp ult i4 [[X:%.*]], -6 ; CHECK-NEXT: ret i1 [[C]] ; %s = lshr exact i4 %x, 1 @@ -2276,7 +2277,7 @@ define i1 @lshrult_01_05_exact(i4 %x) { define i1 @lshrult_01_06_exact(i4 %x) { ; CHECK-LABEL: @lshrult_01_06_exact( -; CHECK-NEXT: [[C:%.*]] = icmp ult i4 %x, -4 +; CHECK-NEXT: [[C:%.*]] = icmp ult i4 [[X:%.*]], -4 ; CHECK-NEXT: ret i1 [[C]] ; %s = lshr exact i4 %x, 1 @@ -2286,7 +2287,7 @@ define i1 @lshrult_01_06_exact(i4 %x) { define i1 @lshrult_01_07_exact(i4 %x) { ; CHECK-LABEL: @lshrult_01_07_exact( -; CHECK-NEXT: [[C:%.*]] = icmp ne i4 %x, -2 +; CHECK-NEXT: [[C:%.*]] = icmp ne i4 [[X:%.*]], -2 ; CHECK-NEXT: ret i1 [[C]] ; %s = lshr exact i4 %x, 1 @@ -2377,7 +2378,7 @@ define i1 @lshrult_02_00_exact(i4 %x) { define i1 @lshrult_02_01_exact(i4 %x) { ; CHECK-LABEL: @lshrult_02_01_exact( -; CHECK-NEXT: [[C:%.*]] = icmp eq i4 %x, 0 +; CHECK-NEXT: [[C:%.*]] = icmp eq i4 [[X:%.*]], 0 ; CHECK-NEXT: ret i1 [[C]] ; %s = lshr exact i4 %x, 2 @@ -2387,7 +2388,7 @@ define i1 @lshrult_02_01_exact(i4 %x) { define i1 @lshrult_02_02_exact(i4 %x) { ; CHECK-LABEL: @lshrult_02_02_exact( -; CHECK-NEXT: [[C:%.*]] = icmp sgt i4 %x, -1 +; CHECK-NEXT: [[C:%.*]] = icmp sgt i4 [[X:%.*]], -1 ; CHECK-NEXT: ret i1 [[C]] ; %s = lshr exact i4 %x, 2 @@ -2397,7 +2398,7 @@ define i1 @lshrult_02_02_exact(i4 %x) { define i1 @lshrult_02_03_exact(i4 %x) { ; CHECK-LABEL: @lshrult_02_03_exact( -; CHECK-NEXT: [[C:%.*]] = icmp ne i4 %x, -4 +; CHECK-NEXT: [[C:%.*]] = icmp ne i4 [[X:%.*]], -4 ; CHECK-NEXT: ret i1 [[C]] ; %s = lshr exact i4 %x, 2 @@ -2524,7 +2525,7 @@ define i1 @lshrult_03_00_exact(i4 %x) { define i1 @lshrult_03_01_exact(i4 %x) { ; CHECK-LABEL: @lshrult_03_01_exact( -; CHECK-NEXT: [[C:%.*]] = icmp ne i4 %x, -8 +; CHECK-NEXT: [[C:%.*]] = icmp ne i4 [[X:%.*]], -8 ; CHECK-NEXT: ret i1 [[C]] ; %s = lshr exact i4 %x, 3 @@ -2660,7 +2661,7 @@ define i1 @lshrult_03_15_exact(i4 %x) { define i1 @ashrsgt_01_00_exact(i4 %x) { ; CHECK-LABEL: @ashrsgt_01_00_exact( -; CHECK-NEXT: [[C:%.*]] = icmp sgt i4 %x, 0 +; CHECK-NEXT: [[C:%.*]] = icmp sgt i4 [[X:%.*]], 0 ; CHECK-NEXT: ret i1 [[C]] ; %s = ashr exact i4 %x, 1 @@ -2670,7 +2671,7 @@ define i1 @ashrsgt_01_00_exact(i4 %x) { define i1 @ashrsgt_01_01_exact(i4 %x) { ; CHECK-LABEL: @ashrsgt_01_01_exact( -; CHECK-NEXT: [[C:%.*]] = icmp sgt i4 %x, 2 +; CHECK-NEXT: [[C:%.*]] = icmp sgt i4 [[X:%.*]], 2 ; CHECK-NEXT: ret i1 [[C]] ; %s = ashr exact i4 %x, 1 @@ -2680,7 +2681,7 @@ define i1 @ashrsgt_01_01_exact(i4 %x) { define i1 @ashrsgt_01_02_exact(i4 %x) { ; CHECK-LABEL: @ashrsgt_01_02_exact( -; CHECK-NEXT: [[C:%.*]] = icmp sgt i4 %x, 4 +; CHECK-NEXT: [[C:%.*]] = icmp sgt i4 [[X:%.*]], 4 ; CHECK-NEXT: ret i1 [[C]] ; %s = ashr exact i4 %x, 1 @@ -2771,7 +2772,7 @@ define i1 @ashrsgt_01_11_exact(i4 %x) { define i1 @ashrsgt_01_12_exact(i4 %x) { ; CHECK-LABEL: @ashrsgt_01_12_exact( -; CHECK-NEXT: [[C:%.*]] = icmp ne i4 %x, -8 +; CHECK-NEXT: [[C:%.*]] = icmp ne i4 [[X:%.*]], -8 ; CHECK-NEXT: ret i1 [[C]] ; %s = ashr exact i4 %x, 1 @@ -2781,7 +2782,7 @@ define i1 @ashrsgt_01_12_exact(i4 %x) { define i1 @ashrsgt_01_13_exact(i4 %x) { ; CHECK-LABEL: @ashrsgt_01_13_exact( -; CHECK-NEXT: [[C:%.*]] = icmp sgt i4 %x, -6 +; CHECK-NEXT: [[C:%.*]] = icmp sgt i4 [[X:%.*]], -6 ; CHECK-NEXT: ret i1 [[C]] ; %s = ashr exact i4 %x, 1 @@ -2791,7 +2792,7 @@ define i1 @ashrsgt_01_13_exact(i4 %x) { define i1 @ashrsgt_01_14_exact(i4 %x) { ; CHECK-LABEL: @ashrsgt_01_14_exact( -; CHECK-NEXT: [[C:%.*]] = icmp sgt i4 %x, -4 +; CHECK-NEXT: [[C:%.*]] = icmp sgt i4 [[X:%.*]], -4 ; CHECK-NEXT: ret i1 [[C]] ; %s = ashr exact i4 %x, 1 @@ -2801,7 +2802,7 @@ define i1 @ashrsgt_01_14_exact(i4 %x) { define i1 @ashrsgt_01_15_exact(i4 %x) { ; CHECK-LABEL: @ashrsgt_01_15_exact( -; CHECK-NEXT: [[C:%.*]] = icmp sgt i4 %x, -1 +; CHECK-NEXT: [[C:%.*]] = icmp sgt i4 [[X:%.*]], -1 ; CHECK-NEXT: ret i1 [[C]] ; %s = ashr exact i4 %x, 1 @@ -2811,7 +2812,7 @@ define i1 @ashrsgt_01_15_exact(i4 %x) { define i1 @ashrsgt_02_00_exact(i4 %x) { ; CHECK-LABEL: @ashrsgt_02_00_exact( -; CHECK-NEXT: [[C:%.*]] = icmp sgt i4 %x, 0 +; CHECK-NEXT: [[C:%.*]] = icmp sgt i4 [[X:%.*]], 0 ; CHECK-NEXT: ret i1 [[C]] ; %s = ashr exact i4 %x, 2 @@ -2938,7 +2939,7 @@ define i1 @ashrsgt_02_13_exact(i4 %x) { define i1 @ashrsgt_02_14_exact(i4 %x) { ; CHECK-LABEL: @ashrsgt_02_14_exact( -; CHECK-NEXT: [[C:%.*]] = icmp ne i4 %x, -8 +; CHECK-NEXT: [[C:%.*]] = icmp ne i4 [[X:%.*]], -8 ; CHECK-NEXT: ret i1 [[C]] ; %s = ashr exact i4 %x, 2 @@ -2948,7 +2949,7 @@ define i1 @ashrsgt_02_14_exact(i4 %x) { define i1 @ashrsgt_02_15_exact(i4 %x) { ; CHECK-LABEL: @ashrsgt_02_15_exact( -; CHECK-NEXT: [[C:%.*]] = icmp sgt i4 %x, -1 +; CHECK-NEXT: [[C:%.*]] = icmp sgt i4 [[X:%.*]], -1 ; CHECK-NEXT: ret i1 [[C]] ; %s = ashr exact i4 %x, 2 @@ -3093,7 +3094,7 @@ define i1 @ashrsgt_03_14_exact(i4 %x) { define i1 @ashrsgt_03_15_exact(i4 %x) { ; CHECK-LABEL: @ashrsgt_03_15_exact( -; CHECK-NEXT: [[C:%.*]] = icmp sgt i4 %x, -1 +; CHECK-NEXT: [[C:%.*]] = icmp sgt i4 [[X:%.*]], -1 ; CHECK-NEXT: ret i1 [[C]] ; %s = ashr exact i4 %x, 3 @@ -3103,7 +3104,7 @@ define i1 @ashrsgt_03_15_exact(i4 %x) { define i1 @ashrslt_01_00_exact(i4 %x) { ; CHECK-LABEL: @ashrslt_01_00_exact( -; CHECK-NEXT: [[C:%.*]] = icmp slt i4 %x, 0 +; CHECK-NEXT: [[C:%.*]] = icmp slt i4 [[X:%.*]], 0 ; CHECK-NEXT: ret i1 [[C]] ; %s = ashr exact i4 %x, 1 @@ -3113,7 +3114,7 @@ define i1 @ashrslt_01_00_exact(i4 %x) { define i1 @ashrslt_01_01_exact(i4 %x) { ; CHECK-LABEL: @ashrslt_01_01_exact( -; CHECK-NEXT: [[C:%.*]] = icmp slt i4 %x, 2 +; CHECK-NEXT: [[C:%.*]] = icmp slt i4 [[X:%.*]], 2 ; CHECK-NEXT: ret i1 [[C]] ; %s = ashr exact i4 %x, 1 @@ -3123,7 +3124,7 @@ define i1 @ashrslt_01_01_exact(i4 %x) { define i1 @ashrslt_01_02_exact(i4 %x) { ; CHECK-LABEL: @ashrslt_01_02_exact( -; CHECK-NEXT: [[C:%.*]] = icmp slt i4 %x, 4 +; CHECK-NEXT: [[C:%.*]] = icmp slt i4 [[X:%.*]], 4 ; CHECK-NEXT: ret i1 [[C]] ; %s = ashr exact i4 %x, 1 @@ -3133,7 +3134,7 @@ define i1 @ashrslt_01_02_exact(i4 %x) { define i1 @ashrslt_01_03_exact(i4 %x) { ; CHECK-LABEL: @ashrslt_01_03_exact( -; CHECK-NEXT: [[C:%.*]] = icmp slt i4 %x, 6 +; CHECK-NEXT: [[C:%.*]] = icmp slt i4 [[X:%.*]], 6 ; CHECK-NEXT: ret i1 [[C]] ; %s = ashr exact i4 %x, 1 @@ -3224,7 +3225,7 @@ define i1 @ashrslt_01_12_exact(i4 %x) { define i1 @ashrslt_01_13_exact(i4 %x) { ; CHECK-LABEL: @ashrslt_01_13_exact( -; CHECK-NEXT: [[C:%.*]] = icmp slt i4 %x, -6 +; CHECK-NEXT: [[C:%.*]] = icmp slt i4 [[X:%.*]], -6 ; CHECK-NEXT: ret i1 [[C]] ; %s = ashr exact i4 %x, 1 @@ -3234,7 +3235,7 @@ define i1 @ashrslt_01_13_exact(i4 %x) { define i1 @ashrslt_01_14_exact(i4 %x) { ; CHECK-LABEL: @ashrslt_01_14_exact( -; CHECK-NEXT: [[C:%.*]] = icmp slt i4 %x, -4 +; CHECK-NEXT: [[C:%.*]] = icmp slt i4 [[X:%.*]], -4 ; CHECK-NEXT: ret i1 [[C]] ; %s = ashr exact i4 %x, 1 @@ -3244,7 +3245,7 @@ define i1 @ashrslt_01_14_exact(i4 %x) { define i1 @ashrslt_01_15_exact(i4 %x) { ; CHECK-LABEL: @ashrslt_01_15_exact( -; CHECK-NEXT: [[C:%.*]] = icmp slt i4 %x, -2 +; CHECK-NEXT: [[C:%.*]] = icmp slt i4 [[X:%.*]], -2 ; CHECK-NEXT: ret i1 [[C]] ; %s = ashr exact i4 %x, 1 @@ -3254,7 +3255,7 @@ define i1 @ashrslt_01_15_exact(i4 %x) { define i1 @ashrslt_02_00_exact(i4 %x) { ; CHECK-LABEL: @ashrslt_02_00_exact( -; CHECK-NEXT: [[C:%.*]] = icmp slt i4 %x, 0 +; CHECK-NEXT: [[C:%.*]] = icmp slt i4 [[X:%.*]], 0 ; CHECK-NEXT: ret i1 [[C]] ; %s = ashr exact i4 %x, 2 @@ -3264,7 +3265,7 @@ define i1 @ashrslt_02_00_exact(i4 %x) { define i1 @ashrslt_02_01_exact(i4 %x) { ; CHECK-LABEL: @ashrslt_02_01_exact( -; CHECK-NEXT: [[C:%.*]] = icmp slt i4 %x, 4 +; CHECK-NEXT: [[C:%.*]] = icmp slt i4 [[X:%.*]], 4 ; CHECK-NEXT: ret i1 [[C]] ; %s = ashr exact i4 %x, 2 @@ -3391,7 +3392,7 @@ define i1 @ashrslt_02_14_exact(i4 %x) { define i1 @ashrslt_02_15_exact(i4 %x) { ; CHECK-LABEL: @ashrslt_02_15_exact( -; CHECK-NEXT: [[C:%.*]] = icmp slt i4 %x, -4 +; CHECK-NEXT: [[C:%.*]] = icmp slt i4 [[X:%.*]], -4 ; CHECK-NEXT: ret i1 [[C]] ; %s = ashr exact i4 %x, 2 @@ -3401,7 +3402,7 @@ define i1 @ashrslt_02_15_exact(i4 %x) { define i1 @ashrslt_03_00_exact(i4 %x) { ; CHECK-LABEL: @ashrslt_03_00_exact( -; CHECK-NEXT: [[C:%.*]] = icmp slt i4 %x, 0 +; CHECK-NEXT: [[C:%.*]] = icmp slt i4 [[X:%.*]], 0 ; CHECK-NEXT: ret i1 [[C]] ; %s = ashr exact i4 %x, 3 From ae738641d53e06b30b1189aedaa1c3dddd8d371d Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Fri, 4 Oct 2019 22:15:57 +0000 Subject: [PATCH 014/254] [NFC][InstCombine] Autogenerate shift.ll test llvm-svn: 373800 --- llvm/test/Transforms/InstCombine/shift.ll | 228 +++++++++++----------- 1 file changed, 114 insertions(+), 114 deletions(-) diff --git a/llvm/test/Transforms/InstCombine/shift.ll b/llvm/test/Transforms/InstCombine/shift.ll index a0e6bbe33ee82..501f015ed730e 100644 --- a/llvm/test/Transforms/InstCombine/shift.ll +++ b/llvm/test/Transforms/InstCombine/shift.ll @@ -21,7 +21,7 @@ define <4 x i32> @shl_non_splat_vector(<4 x i32> %A) { define i32 @test6(i32 %A) { ; CHECK-LABEL: @test6( -; CHECK-NEXT: [[C:%.*]] = mul i32 %A, 6 +; CHECK-NEXT: [[C:%.*]] = mul i32 [[A:%.*]], 6 ; CHECK-NEXT: ret i32 [[C]] ; %B = shl i32 %A, 1 ;; convert to an mul instruction @@ -31,7 +31,7 @@ define i32 @test6(i32 %A) { define i32 @test6a(i32 %A) { ; CHECK-LABEL: @test6a( -; CHECK-NEXT: [[C:%.*]] = mul i32 %A, 6 +; CHECK-NEXT: [[C:%.*]] = mul i32 [[A:%.*]], 6 ; CHECK-NEXT: ret i32 [[C]] ; %B = mul i32 %A, 3 @@ -52,7 +52,7 @@ define i8 @test8(i8 %A) { ;; (A << 7) >> 7 === A & 1 define i8 @test9(i8 %A) { ; CHECK-LABEL: @test9( -; CHECK-NEXT: [[B:%.*]] = and i8 %A, 1 +; CHECK-NEXT: [[B:%.*]] = and i8 [[A:%.*]], 1 ; CHECK-NEXT: ret i8 [[B]] ; %B = shl i8 %A, 7 @@ -64,7 +64,7 @@ define i8 @test9(i8 %A) { define i8 @test10(i8 %A) { ; CHECK-LABEL: @test10( -; CHECK-NEXT: [[B:%.*]] = and i8 %A, -128 +; CHECK-NEXT: [[B:%.*]] = and i8 [[A:%.*]], -128 ; CHECK-NEXT: ret i8 [[B]] ; %B = lshr i8 %A, 7 @@ -75,7 +75,7 @@ define i8 @test10(i8 %A) { ;; Allow the simplification when the lshr shift is exact. define i8 @test10a(i8 %A) { ; CHECK-LABEL: @test10a( -; CHECK-NEXT: ret i8 %A +; CHECK-NEXT: ret i8 [[A:%.*]] ; %B = lshr exact i8 %A, 7 %C = shl i8 %B, 7 @@ -85,14 +85,14 @@ define i8 @test10a(i8 %A) { ;; This transformation is deferred to DAGCombine: ;; (A >> 3) << 4 === (A & 0x1F) << 1 ;; The shl may be valuable to scalar evolution. -define i8 @test11(i8 %A) { +define i8 @test11(i8 %x) { ; CHECK-LABEL: @test11( -; CHECK-NEXT: [[A:%.*]] = mul i8 %A, 3 +; CHECK-NEXT: [[A:%.*]] = mul i8 [[X:%.*]], 3 ; CHECK-NEXT: [[B:%.*]] = lshr i8 [[A]], 3 ; CHECK-NEXT: [[C:%.*]] = shl i8 [[B]], 4 ; CHECK-NEXT: ret i8 [[C]] ; - %a = mul i8 %A, 3 + %a = mul i8 %x, 3 %B = lshr i8 %a, 3 %C = shl i8 %B, 4 ret i8 %C @@ -101,7 +101,7 @@ define i8 @test11(i8 %A) { ;; Allow the simplification in InstCombine when the lshr shift is exact. define i8 @test11a(i8 %A) { ; CHECK-LABEL: @test11a( -; CHECK-NEXT: [[C:%.*]] = mul i8 %A, 6 +; CHECK-NEXT: [[C:%.*]] = mul i8 [[A:%.*]], 6 ; CHECK-NEXT: ret i8 [[C]] ; %a = mul i8 %A, 3 @@ -114,8 +114,8 @@ define i8 @test11a(i8 %A) { ;; (A >> 8) << 8 === A & -256 define i32 @test12(i32 %A) { ; CHECK-LABEL: @test12( -; CHECK-NEXT: [[B1:%.*]] = and i32 %A, -256 -; CHECK-NEXT: ret i32 [[B1]] +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[A:%.*]], -256 +; CHECK-NEXT: ret i32 [[TMP1]] ; %B = ashr i32 %A, 8 %C = shl i32 %B, 8 @@ -141,14 +141,14 @@ define i8 @shishi(i8 %x) { ;; This transformation is deferred to DAGCombine: ;; (A >> 3) << 4 === (A & -8) * 2 ;; The shl may be valuable to scalar evolution. -define i8 @test13(i8 %A) { +define i8 @test13(i8 %x) { ; CHECK-LABEL: @test13( -; CHECK-NEXT: [[A:%.*]] = mul i8 %A, 3 -; CHECK-NEXT: [[B1:%.*]] = lshr i8 [[A]], 3 -; CHECK-NEXT: [[C:%.*]] = shl i8 [[B1]], 4 +; CHECK-NEXT: [[A:%.*]] = mul i8 [[X:%.*]], 3 +; CHECK-NEXT: [[TMP1:%.*]] = lshr i8 [[A]], 3 +; CHECK-NEXT: [[C:%.*]] = shl i8 [[TMP1]], 4 ; CHECK-NEXT: ret i8 [[C]] ; - %a = mul i8 %A, 3 + %a = mul i8 %x, 3 %B = ashr i8 %a, 3 %C = shl i8 %B, 4 ret i8 %C @@ -156,7 +156,7 @@ define i8 @test13(i8 %A) { define i8 @test13a(i8 %A) { ; CHECK-LABEL: @test13a( -; CHECK-NEXT: [[C:%.*]] = mul i8 %A, 6 +; CHECK-NEXT: [[C:%.*]] = mul i8 [[A:%.*]], 6 ; CHECK-NEXT: ret i8 [[C]] ; %a = mul i8 %A, 3 @@ -168,7 +168,7 @@ define i8 @test13a(i8 %A) { ;; D = ((B | 1234) << 4) === ((B << 4)|(1234 << 4) define i32 @test14(i32 %A) { ; CHECK-LABEL: @test14( -; CHECK-NEXT: [[B:%.*]] = and i32 %A, -19760 +; CHECK-NEXT: [[B:%.*]] = and i32 [[A:%.*]], -19760 ; CHECK-NEXT: [[C:%.*]] = or i32 [[B]], 19744 ; CHECK-NEXT: ret i32 [[C]] ; @@ -181,7 +181,7 @@ define i32 @test14(i32 %A) { ;; D = ((B | 1234) << 4) === ((B << 4)|(1234 << 4) define i32 @test14a(i32 %A) { ; CHECK-LABEL: @test14a( -; CHECK-NEXT: [[C:%.*]] = and i32 %A, 77 +; CHECK-NEXT: [[C:%.*]] = and i32 [[A:%.*]], 77 ; CHECK-NEXT: ret i32 [[C]] ; %B = shl i32 %A, 4 @@ -192,7 +192,7 @@ define i32 @test14a(i32 %A) { define i32 @test15(i1 %C) { ; CHECK-LABEL: @test15( -; CHECK-NEXT: [[A:%.*]] = select i1 %C, i32 12, i32 4 +; CHECK-NEXT: [[A:%.*]] = select i1 [[C:%.*]], i32 12, i32 4 ; CHECK-NEXT: ret i32 [[A]] ; %A = select i1 %C, i32 3, i32 1 @@ -202,7 +202,7 @@ define i32 @test15(i1 %C) { define i32 @test15a(i1 %C) { ; CHECK-LABEL: @test15a( -; CHECK-NEXT: [[V:%.*]] = select i1 %C, i32 512, i32 128 +; CHECK-NEXT: [[V:%.*]] = select i1 [[C:%.*]], i32 512, i32 128 ; CHECK-NEXT: ret i32 [[V]] ; %A = select i1 %C, i8 3, i8 1 @@ -213,7 +213,7 @@ define i32 @test15a(i1 %C) { define i1 @test16(i32 %X) { ; CHECK-LABEL: @test16( -; CHECK-NEXT: [[TMP_6:%.*]] = and i32 %X, 16 +; CHECK-NEXT: [[TMP_6:%.*]] = and i32 [[X:%.*]], 16 ; CHECK-NEXT: [[TMP_7:%.*]] = icmp ne i32 [[TMP_6]], 0 ; CHECK-NEXT: ret i1 [[TMP_7]] ; @@ -225,7 +225,7 @@ define i1 @test16(i32 %X) { define i1 @test17(i32 %A) { ; CHECK-LABEL: @test17( -; CHECK-NEXT: [[B_MASK:%.*]] = and i32 %A, -8 +; CHECK-NEXT: [[B_MASK:%.*]] = and i32 [[A:%.*]], -8 ; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[B_MASK]], 9872 ; CHECK-NEXT: ret i1 [[C]] ; @@ -236,7 +236,7 @@ define i1 @test17(i32 %A) { define <2 x i1> @test17vec(<2 x i32> %A) { ; CHECK-LABEL: @test17vec( -; CHECK-NEXT: [[B_MASK:%.*]] = and <2 x i32> %A, +; CHECK-NEXT: [[B_MASK:%.*]] = and <2 x i32> [[A:%.*]], ; CHECK-NEXT: [[C:%.*]] = icmp eq <2 x i32> [[B_MASK]], ; CHECK-NEXT: ret <2 x i1> [[C]] ; @@ -257,7 +257,7 @@ define i1 @test18(i8 %A) { define i1 @test19(i32 %A) { ; CHECK-LABEL: @test19( -; CHECK-NEXT: [[C:%.*]] = icmp ult i32 %A, 4 +; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[A:%.*]], 4 ; CHECK-NEXT: ret i1 [[C]] ; %B = ashr i32 %A, 2 @@ -268,7 +268,7 @@ define i1 @test19(i32 %A) { define <2 x i1> @test19vec(<2 x i32> %A) { ; CHECK-LABEL: @test19vec( -; CHECK-NEXT: [[C:%.*]] = icmp ult <2 x i32> %A, +; CHECK-NEXT: [[C:%.*]] = icmp ult <2 x i32> [[A:%.*]], ; CHECK-NEXT: ret <2 x i1> [[C]] ; %B = ashr <2 x i32> %A, @@ -279,7 +279,7 @@ define <2 x i1> @test19vec(<2 x i32> %A) { ;; X >u ~4 define i1 @test19a(i32 %A) { ; CHECK-LABEL: @test19a( -; CHECK-NEXT: [[C:%.*]] = icmp ugt i32 %A, -5 +; CHECK-NEXT: [[C:%.*]] = icmp ugt i32 [[A:%.*]], -5 ; CHECK-NEXT: ret i1 [[C]] ; %B = ashr i32 %A, 2 @@ -289,7 +289,7 @@ define i1 @test19a(i32 %A) { define <2 x i1> @test19a_vec(<2 x i32> %A) { ; CHECK-LABEL: @test19a_vec( -; CHECK-NEXT: [[C:%.*]] = icmp ugt <2 x i32> %A, +; CHECK-NEXT: [[C:%.*]] = icmp ugt <2 x i32> [[A:%.*]], ; CHECK-NEXT: ret <2 x i1> [[C]] ; %B = ashr <2 x i32> %A, @@ -309,7 +309,7 @@ define i1 @test20(i8 %A) { define i1 @test21(i8 %A) { ; CHECK-LABEL: @test21( -; CHECK-NEXT: [[B_MASK:%.*]] = and i8 %A, 15 +; CHECK-NEXT: [[B_MASK:%.*]] = and i8 [[A:%.*]], 15 ; CHECK-NEXT: [[C:%.*]] = icmp eq i8 [[B_MASK]], 8 ; CHECK-NEXT: ret i1 [[C]] ; @@ -320,7 +320,7 @@ define i1 @test21(i8 %A) { define i1 @test22(i8 %A) { ; CHECK-LABEL: @test22( -; CHECK-NEXT: [[B_MASK:%.*]] = and i8 %A, 15 +; CHECK-NEXT: [[B_MASK:%.*]] = and i8 [[A:%.*]], 15 ; CHECK-NEXT: [[C:%.*]] = icmp eq i8 [[B_MASK]], 0 ; CHECK-NEXT: ret i1 [[C]] ; @@ -331,7 +331,7 @@ define i1 @test22(i8 %A) { define i8 @test23(i32 %A) { ; CHECK-LABEL: @test23( -; CHECK-NEXT: [[D:%.*]] = trunc i32 %A to i8 +; CHECK-NEXT: [[D:%.*]] = trunc i32 [[A:%.*]] to i8 ; CHECK-NEXT: ret i8 [[D]] ; ;; casts not needed @@ -343,7 +343,7 @@ define i8 @test23(i32 %A) { define i8 @test24(i8 %X) { ; CHECK-LABEL: @test24( -; CHECK-NEXT: [[Z:%.*]] = and i8 %X, 3 +; CHECK-NEXT: [[Z:%.*]] = and i8 [[X:%.*]], 3 ; CHECK-NEXT: ret i8 [[Z]] ; %Y = and i8 %X, -5 @@ -354,8 +354,8 @@ define i8 @test24(i8 %X) { define i32 @test25(i32 %tmp.2, i32 %AA) { ; CHECK-LABEL: @test25( -; CHECK-NEXT: [[TMP_3:%.*]] = and i32 %tmp.2, -131072 -; CHECK-NEXT: [[X2:%.*]] = add i32 [[TMP_3]], %AA +; CHECK-NEXT: [[TMP_3:%.*]] = and i32 [[TMP_2:%.*]], -131072 +; CHECK-NEXT: [[X2:%.*]] = add i32 [[TMP_3]], [[AA:%.*]] ; CHECK-NEXT: [[TMP_6:%.*]] = and i32 [[X2]], -131072 ; CHECK-NEXT: ret i32 [[TMP_6]] ; @@ -368,8 +368,8 @@ define i32 @test25(i32 %tmp.2, i32 %AA) { define <2 x i32> @test25_vector(<2 x i32> %tmp.2, <2 x i32> %AA) { ; CHECK-LABEL: @test25_vector( -; CHECK-NEXT: [[TMP_3:%.*]] = and <2 x i32> %tmp.2, -; CHECK-NEXT: [[X2:%.*]] = add <2 x i32> [[TMP_3]], %AA +; CHECK-NEXT: [[TMP_3:%.*]] = and <2 x i32> [[TMP_2:%.*]], +; CHECK-NEXT: [[X2:%.*]] = add <2 x i32> [[TMP_3]], [[AA:%.*]] ; CHECK-NEXT: [[TMP_6:%.*]] = and <2 x i32> [[X2]], ; CHECK-NEXT: ret <2 x i32> [[TMP_6]] ; @@ -383,7 +383,7 @@ define <2 x i32> @test25_vector(<2 x i32> %tmp.2, <2 x i32> %AA) { ;; handle casts between shifts. define i32 @test26(i32 %A) { ; CHECK-LABEL: @test26( -; CHECK-NEXT: [[B:%.*]] = and i32 %A, -2 +; CHECK-NEXT: [[B:%.*]] = and i32 [[A:%.*]], -2 ; CHECK-NEXT: ret i32 [[B]] ; %B = lshr i32 %A, 1 @@ -395,7 +395,7 @@ define i32 @test26(i32 %A) { define i1 @test27(i32 %x) nounwind { ; CHECK-LABEL: @test27( -; CHECK-NEXT: [[TMP1:%.*]] = and i32 %x, 8 +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[X:%.*]], 8 ; CHECK-NEXT: [[Z:%.*]] = icmp ne i32 [[TMP1]], 0 ; CHECK-NEXT: ret i1 [[Z]] ; @@ -406,7 +406,7 @@ define i1 @test27(i32 %x) nounwind { define i1 @test28(i8 %x) { ; CHECK-LABEL: @test28( -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 %x, 0 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[X:%.*]], 0 ; CHECK-NEXT: ret i1 [[CMP]] ; %shr = lshr i8 %x, 7 @@ -416,7 +416,7 @@ define i1 @test28(i8 %x) { define <2 x i1> @test28vec(<2 x i8> %x) { ; CHECK-LABEL: @test28vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i8> %x, zeroinitializer +; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i8> [[X:%.*]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %shr = lshr <2 x i8> %x, @@ -427,13 +427,13 @@ define <2 x i1> @test28vec(<2 x i8> %x) { define i8 @test28a(i8 %x, i8 %y) { ; CHECK-LABEL: @test28a( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP1:%.*]] = lshr i8 %x, 7 +; CHECK-NEXT: [[TMP1:%.*]] = lshr i8 [[X:%.*]], 7 ; CHECK-NEXT: [[COND1:%.*]] = icmp eq i8 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[COND1]], label %bb2, label %bb1 +; CHECK-NEXT: br i1 [[COND1]], label [[BB2:%.*]], label [[BB1:%.*]] ; CHECK: bb1: ; CHECK-NEXT: ret i8 [[TMP1]] ; CHECK: bb2: -; CHECK-NEXT: [[TMP2:%.*]] = add i8 [[TMP1]], %y +; CHECK-NEXT: [[TMP2:%.*]] = add i8 [[TMP1]], [[Y:%.*]] ; CHECK-NEXT: ret i8 [[TMP2]] ; entry: @@ -452,7 +452,7 @@ bb2: define i32 @test29(i64 %d18) { ; CHECK-LABEL: @test29( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP916:%.*]] = lshr i64 %d18, 63 +; CHECK-NEXT: [[TMP916:%.*]] = lshr i64 [[D18:%.*]], 63 ; CHECK-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP916]] to i32 ; CHECK-NEXT: ret i32 [[TMP10]] ; @@ -466,8 +466,8 @@ entry: define i32 @test30(i32 %A, i32 %B, i32 %C) { ; CHECK-LABEL: @test30( -; CHECK-NEXT: [[X1:%.*]] = and i32 %A, %B -; CHECK-NEXT: [[Z:%.*]] = shl i32 [[X1]], %C +; CHECK-NEXT: [[X1:%.*]] = and i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[Z:%.*]] = shl i32 [[X1]], [[C:%.*]] ; CHECK-NEXT: ret i32 [[Z]] ; %X = shl i32 %A, %C @@ -478,8 +478,8 @@ define i32 @test30(i32 %A, i32 %B, i32 %C) { define i32 @test31(i32 %A, i32 %B, i32 %C) { ; CHECK-LABEL: @test31( -; CHECK-NEXT: [[X1:%.*]] = or i32 %A, %B -; CHECK-NEXT: [[Z:%.*]] = lshr i32 [[X1]], %C +; CHECK-NEXT: [[X1:%.*]] = or i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[Z:%.*]] = lshr i32 [[X1]], [[C:%.*]] ; CHECK-NEXT: ret i32 [[Z]] ; %X = lshr i32 %A, %C @@ -490,8 +490,8 @@ define i32 @test31(i32 %A, i32 %B, i32 %C) { define i32 @test32(i32 %A, i32 %B, i32 %C) { ; CHECK-LABEL: @test32( -; CHECK-NEXT: [[X1:%.*]] = xor i32 %A, %B -; CHECK-NEXT: [[Z:%.*]] = ashr i32 [[X1]], %C +; CHECK-NEXT: [[X1:%.*]] = xor i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[Z:%.*]] = ashr i32 [[X1]], [[C:%.*]] ; CHECK-NEXT: ret i32 [[Z]] ; %X = ashr i32 %A, %C @@ -502,7 +502,7 @@ define i32 @test32(i32 %A, i32 %B, i32 %C) { define i1 @test33(i32 %X) { ; CHECK-LABEL: @test33( -; CHECK-NEXT: [[TMP1_MASK:%.*]] = and i32 %X, 16777216 +; CHECK-NEXT: [[TMP1_MASK:%.*]] = and i32 [[X:%.*]], 16777216 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1_MASK]], 0 ; CHECK-NEXT: ret i1 [[TMP2]] ; @@ -513,7 +513,7 @@ define i1 @test33(i32 %X) { define <2 x i1> @test33vec(<2 x i32> %X) { ; CHECK-LABEL: @test33vec( -; CHECK-NEXT: [[TMP1_MASK:%.*]] = and <2 x i32> %X, +; CHECK-NEXT: [[TMP1_MASK:%.*]] = and <2 x i32> [[X:%.*]], ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x i32> [[TMP1_MASK]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[TMP2]] ; @@ -533,7 +533,7 @@ define i1 @test34(i32 %X) { define i1 @test35(i32 %X) { ; CHECK-LABEL: @test35( -; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 %X, 0 +; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 [[X:%.*]], 0 ; CHECK-NEXT: ret i1 [[TMP2]] ; %tmp1 = ashr i32 %X, 7 @@ -543,7 +543,7 @@ define i1 @test35(i32 %X) { define <2 x i1> @test35vec(<2 x i32> %X) { ; CHECK-LABEL: @test35vec( -; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <2 x i32> %X, zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <2 x i32> [[X:%.*]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[TMP2]] ; %tmp1 = ashr <2 x i32> %X, @@ -553,7 +553,7 @@ define <2 x i1> @test35vec(<2 x i32> %X) { define i128 @test36(i128 %A, i128 %B) { ; CHECK-LABEL: @test36( -; CHECK-NEXT: [[TMP231:%.*]] = or i128 %B, %A +; CHECK-NEXT: [[TMP231:%.*]] = or i128 [[B:%.*]], [[A:%.*]] ; CHECK-NEXT: [[INS:%.*]] = and i128 [[TMP231]], 18446744073709551615 ; CHECK-NEXT: ret i128 [[INS]] ; @@ -566,9 +566,9 @@ define i128 @test36(i128 %A, i128 %B) { define i64 @test37(i128 %A, i32 %B) { ; CHECK-LABEL: @test37( -; CHECK-NEXT: [[TMP22:%.*]] = zext i32 %B to i128 +; CHECK-NEXT: [[TMP22:%.*]] = zext i32 [[B:%.*]] to i128 ; CHECK-NEXT: [[TMP23:%.*]] = shl nuw nsw i128 [[TMP22]], 32 -; CHECK-NEXT: [[INS:%.*]] = or i128 [[TMP23]], %A +; CHECK-NEXT: [[INS:%.*]] = or i128 [[TMP23]], [[A:%.*]] ; CHECK-NEXT: [[TMP46:%.*]] = trunc i128 [[INS]] to i64 ; CHECK-NEXT: ret i64 [[TMP46]] ; @@ -583,7 +583,7 @@ define i64 @test37(i128 %A, i32 %B) { define <2 x i32> @shl_nuw_nsw_splat_vec(<2 x i8> %x) { ; CHECK-LABEL: @shl_nuw_nsw_splat_vec( -; CHECK-NEXT: [[T2:%.*]] = zext <2 x i8> %x to <2 x i32> +; CHECK-NEXT: [[T2:%.*]] = zext <2 x i8> [[X:%.*]] to <2 x i32> ; CHECK-NEXT: [[T3:%.*]] = shl nuw nsw <2 x i32> [[T2]], ; CHECK-NEXT: ret <2 x i32> [[T3]] ; @@ -594,7 +594,7 @@ define <2 x i32> @shl_nuw_nsw_splat_vec(<2 x i8> %x) { define i32 @test38(i32 %x) nounwind readnone { ; CHECK-LABEL: @test38( -; CHECK-NEXT: [[REM1:%.*]] = and i32 %x, 31 +; CHECK-NEXT: [[REM1:%.*]] = and i32 [[X:%.*]], 31 ; CHECK-NEXT: [[SHL:%.*]] = shl i32 1, [[REM1]] ; CHECK-NEXT: ret i32 [[SHL]] ; @@ -607,7 +607,7 @@ define i32 @test38(i32 %x) nounwind readnone { define i8 @test39(i32 %a0) { ; CHECK-LABEL: @test39( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP4:%.*]] = trunc i32 %a0 to i8 +; CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[A0:%.*]] to i8 ; CHECK-NEXT: [[TMP5:%.*]] = shl i8 [[TMP4]], 5 ; CHECK-NEXT: [[TMP49:%.*]] = shl i8 [[TMP4]], 6 ; CHECK-NEXT: [[TMP50:%.*]] = and i8 [[TMP49]], 64 @@ -634,8 +634,8 @@ entry: ; PR9809 define i32 @test40(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: @test40( -; CHECK-NEXT: [[TMP1:%.*]] = add i32 %b, 2 -; CHECK-NEXT: [[DIV:%.*]] = lshr i32 %a, [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[B:%.*]], 2 +; CHECK-NEXT: [[DIV:%.*]] = lshr i32 [[A:%.*]], [[TMP1]] ; CHECK-NEXT: ret i32 [[DIV]] ; %shl1 = shl i32 1, %b @@ -646,7 +646,7 @@ define i32 @test40(i32 %a, i32 %b) nounwind { define i32 @test41(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: @test41( -; CHECK-NEXT: [[TMP1:%.*]] = shl i32 8, %b +; CHECK-NEXT: [[TMP1:%.*]] = shl i32 8, [[B:%.*]] ; CHECK-NEXT: ret i32 [[TMP1]] ; %1 = shl i32 1, %b @@ -656,8 +656,8 @@ define i32 @test41(i32 %a, i32 %b) nounwind { define i32 @test42(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: @test42( -; CHECK-NEXT: [[DIV:%.*]] = lshr exact i32 4096, %b -; CHECK-NEXT: [[DIV2:%.*]] = udiv i32 %a, [[DIV]] +; CHECK-NEXT: [[DIV:%.*]] = lshr exact i32 4096, [[B:%.*]] +; CHECK-NEXT: [[DIV2:%.*]] = udiv i32 [[A:%.*]], [[DIV]] ; CHECK-NEXT: ret i32 [[DIV2]] ; %div = lshr i32 4096, %b ; must be exact otherwise we'd divide by zero @@ -667,8 +667,8 @@ define i32 @test42(i32 %a, i32 %b) nounwind { define <2 x i32> @test42vec(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: @test42vec( -; CHECK-NEXT: [[DIV:%.*]] = lshr exact <2 x i32> , %b -; CHECK-NEXT: [[DIV2:%.*]] = udiv <2 x i32> %a, [[DIV]] +; CHECK-NEXT: [[DIV:%.*]] = lshr exact <2 x i32> , [[B:%.*]] +; CHECK-NEXT: [[DIV2:%.*]] = udiv <2 x i32> [[A:%.*]], [[DIV]] ; CHECK-NEXT: ret <2 x i32> [[DIV2]] ; %div = lshr <2 x i32> , %b ; must be exact otherwise we'd divide by zero @@ -678,8 +678,8 @@ define <2 x i32> @test42vec(<2 x i32> %a, <2 x i32> %b) { define i32 @test43(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: @test43( -; CHECK-NEXT: [[TMP1:%.*]] = add i32 %b, 12 -; CHECK-NEXT: [[DIV2:%.*]] = lshr i32 %a, [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[B:%.*]], 12 +; CHECK-NEXT: [[DIV2:%.*]] = lshr i32 [[A:%.*]], [[TMP1]] ; CHECK-NEXT: ret i32 [[DIV2]] ; %div = shl i32 4096, %b ; must be exact otherwise we'd divide by zero @@ -689,7 +689,7 @@ define i32 @test43(i32 %a, i32 %b) nounwind { define i32 @test44(i32 %a) nounwind { ; CHECK-LABEL: @test44( -; CHECK-NEXT: [[Y:%.*]] = shl i32 %a, 5 +; CHECK-NEXT: [[Y:%.*]] = shl i32 [[A:%.*]], 5 ; CHECK-NEXT: ret i32 [[Y]] ; %y = shl nuw i32 %a, 1 @@ -699,7 +699,7 @@ define i32 @test44(i32 %a) nounwind { define i32 @test45(i32 %a) nounwind { ; CHECK-LABEL: @test45( -; CHECK-NEXT: [[Y:%.*]] = lshr i32 %a, 5 +; CHECK-NEXT: [[Y:%.*]] = lshr i32 [[A:%.*]], 5 ; CHECK-NEXT: ret i32 [[Y]] ; %y = lshr exact i32 %a, 1 @@ -711,7 +711,7 @@ define i32 @test45(i32 %a) nounwind { define i32 @test46(i32 %a) { ; CHECK-LABEL: @test46( -; CHECK-NEXT: [[Z:%.*]] = ashr exact i32 %a, 2 +; CHECK-NEXT: [[Z:%.*]] = ashr exact i32 [[A:%.*]], 2 ; CHECK-NEXT: ret i32 [[Z]] ; %y = ashr exact i32 %a, 3 @@ -723,7 +723,7 @@ define i32 @test46(i32 %a) { define <2 x i32> @test46_splat_vec(<2 x i32> %a) { ; CHECK-LABEL: @test46_splat_vec( -; CHECK-NEXT: [[Z:%.*]] = ashr exact <2 x i32> %a, +; CHECK-NEXT: [[Z:%.*]] = ashr exact <2 x i32> [[A:%.*]], ; CHECK-NEXT: ret <2 x i32> [[Z]] ; %y = ashr exact <2 x i32> %a, @@ -735,7 +735,7 @@ define <2 x i32> @test46_splat_vec(<2 x i32> %a) { define i8 @test47(i8 %a) { ; CHECK-LABEL: @test47( -; CHECK-NEXT: [[Z:%.*]] = lshr exact i8 %a, 2 +; CHECK-NEXT: [[Z:%.*]] = lshr exact i8 [[A:%.*]], 2 ; CHECK-NEXT: ret i8 [[Z]] ; %y = lshr exact i8 %a, 3 @@ -747,7 +747,7 @@ define i8 @test47(i8 %a) { define <2 x i8> @test47_splat_vec(<2 x i8> %a) { ; CHECK-LABEL: @test47_splat_vec( -; CHECK-NEXT: [[Z:%.*]] = lshr exact <2 x i8> %a, +; CHECK-NEXT: [[Z:%.*]] = lshr exact <2 x i8> [[A:%.*]], ; CHECK-NEXT: ret <2 x i8> [[Z]] ; %y = lshr exact <2 x i8> %a, @@ -759,7 +759,7 @@ define <2 x i8> @test47_splat_vec(<2 x i8> %a) { define i32 @test48(i32 %x) { ; CHECK-LABEL: @test48( -; CHECK-NEXT: [[B:%.*]] = shl i32 %x, 2 +; CHECK-NEXT: [[B:%.*]] = shl i32 [[X:%.*]], 2 ; CHECK-NEXT: ret i32 [[B]] ; %A = lshr exact i32 %x, 1 @@ -771,7 +771,7 @@ define i32 @test48(i32 %x) { define i32 @test48_nuw_nsw(i32 %x) { ; CHECK-LABEL: @test48_nuw_nsw( -; CHECK-NEXT: [[B:%.*]] = shl nuw nsw i32 %x, 2 +; CHECK-NEXT: [[B:%.*]] = shl nuw nsw i32 [[X:%.*]], 2 ; CHECK-NEXT: ret i32 [[B]] ; %A = lshr exact i32 %x, 1 @@ -783,7 +783,7 @@ define i32 @test48_nuw_nsw(i32 %x) { define <2 x i32> @test48_splat_vec(<2 x i32> %x) { ; CHECK-LABEL: @test48_splat_vec( -; CHECK-NEXT: [[B:%.*]] = shl nuw nsw <2 x i32> %x, +; CHECK-NEXT: [[B:%.*]] = shl nuw nsw <2 x i32> [[X:%.*]], ; CHECK-NEXT: ret <2 x i32> [[B]] ; %A = lshr exact <2 x i32> %x, @@ -795,7 +795,7 @@ define <2 x i32> @test48_splat_vec(<2 x i32> %x) { define i32 @test49(i32 %x) { ; CHECK-LABEL: @test49( -; CHECK-NEXT: [[B:%.*]] = shl i32 %x, 2 +; CHECK-NEXT: [[B:%.*]] = shl i32 [[X:%.*]], 2 ; CHECK-NEXT: ret i32 [[B]] ; %A = ashr exact i32 %x, 1 @@ -807,7 +807,7 @@ define i32 @test49(i32 %x) { define i32 @test49_nuw_nsw(i32 %x) { ; CHECK-LABEL: @test49_nuw_nsw( -; CHECK-NEXT: [[B:%.*]] = shl nuw nsw i32 %x, 2 +; CHECK-NEXT: [[B:%.*]] = shl nuw nsw i32 [[X:%.*]], 2 ; CHECK-NEXT: ret i32 [[B]] ; %A = ashr exact i32 %x, 1 @@ -819,7 +819,7 @@ define i32 @test49_nuw_nsw(i32 %x) { define <2 x i32> @test49_splat_vec(<2 x i32> %x) { ; CHECK-LABEL: @test49_splat_vec( -; CHECK-NEXT: [[B:%.*]] = shl nuw nsw <2 x i32> %x, +; CHECK-NEXT: [[B:%.*]] = shl nuw nsw <2 x i32> [[X:%.*]], ; CHECK-NEXT: ret <2 x i32> [[B]] ; %A = ashr exact <2 x i32> %x, @@ -831,7 +831,7 @@ define <2 x i32> @test49_splat_vec(<2 x i32> %x) { define i32 @test50(i32 %x) { ; CHECK-LABEL: @test50( -; CHECK-NEXT: [[B:%.*]] = ashr i32 %x, 2 +; CHECK-NEXT: [[B:%.*]] = ashr i32 [[X:%.*]], 2 ; CHECK-NEXT: ret i32 [[B]] ; %A = shl nsw i32 %x, 1 @@ -844,7 +844,7 @@ define i32 @test50(i32 %x) { define <2 x i32> @test50_splat_vec(<2 x i32> %x) { ; CHECK-LABEL: @test50_splat_vec( -; CHECK-NEXT: [[B:%.*]] = ashr exact <2 x i32> %x, +; CHECK-NEXT: [[B:%.*]] = ashr exact <2 x i32> [[X:%.*]], ; CHECK-NEXT: ret <2 x i32> [[B]] ; %A = shl nsw <2 x i32> %x, @@ -856,7 +856,7 @@ define <2 x i32> @test50_splat_vec(<2 x i32> %x) { define i32 @test51(i32 %x) { ; CHECK-LABEL: @test51( -; CHECK-NEXT: [[B:%.*]] = lshr i32 %x, 2 +; CHECK-NEXT: [[B:%.*]] = lshr i32 [[X:%.*]], 2 ; CHECK-NEXT: ret i32 [[B]] ; %A = shl nuw i32 %x, 1 @@ -869,7 +869,7 @@ define i32 @test51(i32 %x) { define <2 x i32> @test51_splat_vec(<2 x i32> %x) { ; CHECK-LABEL: @test51_splat_vec( -; CHECK-NEXT: [[B:%.*]] = lshr exact <2 x i32> %x, +; CHECK-NEXT: [[B:%.*]] = lshr exact <2 x i32> [[X:%.*]], ; CHECK-NEXT: ret <2 x i32> [[B]] ; %A = shl nuw <2 x i32> %x, @@ -882,7 +882,7 @@ define <2 x i32> @test51_splat_vec(<2 x i32> %x) { define i32 @test51_no_nuw(i32 %x) { ; CHECK-LABEL: @test51_no_nuw( -; CHECK-NEXT: [[TMP1:%.*]] = lshr exact i32 %x, 2 +; CHECK-NEXT: [[TMP1:%.*]] = lshr exact i32 [[X:%.*]], 2 ; CHECK-NEXT: [[B:%.*]] = and i32 [[TMP1]], 536870911 ; CHECK-NEXT: ret i32 [[B]] ; @@ -895,7 +895,7 @@ define i32 @test51_no_nuw(i32 %x) { define <2 x i32> @test51_no_nuw_splat_vec(<2 x i32> %x) { ; CHECK-LABEL: @test51_no_nuw_splat_vec( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> %x, +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], ; CHECK-NEXT: [[B:%.*]] = and <2 x i32> [[TMP1]], ; CHECK-NEXT: ret <2 x i32> [[B]] ; @@ -908,7 +908,7 @@ define <2 x i32> @test51_no_nuw_splat_vec(<2 x i32> %x) { define i32 @test52(i32 %x) { ; CHECK-LABEL: @test52( -; CHECK-NEXT: [[B:%.*]] = shl nsw i32 %x, 2 +; CHECK-NEXT: [[B:%.*]] = shl nsw i32 [[X:%.*]], 2 ; CHECK-NEXT: ret i32 [[B]] ; %A = shl nsw i32 %x, 3 @@ -920,7 +920,7 @@ define i32 @test52(i32 %x) { define <2 x i32> @test52_splat_vec(<2 x i32> %x) { ; CHECK-LABEL: @test52_splat_vec( -; CHECK-NEXT: [[B:%.*]] = shl nsw <2 x i32> %x, +; CHECK-NEXT: [[B:%.*]] = shl nsw <2 x i32> [[X:%.*]], ; CHECK-NEXT: ret <2 x i32> [[B]] ; %A = shl nsw <2 x i32> %x, @@ -932,7 +932,7 @@ define <2 x i32> @test52_splat_vec(<2 x i32> %x) { define i32 @test53(i32 %x) { ; CHECK-LABEL: @test53( -; CHECK-NEXT: [[B:%.*]] = shl nuw i32 %x, 2 +; CHECK-NEXT: [[B:%.*]] = shl nuw i32 [[X:%.*]], 2 ; CHECK-NEXT: ret i32 [[B]] ; %A = shl nuw i32 %x, 3 @@ -944,7 +944,7 @@ define i32 @test53(i32 %x) { define <2 x i32> @test53_splat_vec(<2 x i32> %x) { ; CHECK-LABEL: @test53_splat_vec( -; CHECK-NEXT: [[B:%.*]] = shl nuw <2 x i32> %x, +; CHECK-NEXT: [[B:%.*]] = shl nuw <2 x i32> [[X:%.*]], ; CHECK-NEXT: ret <2 x i32> [[B]] ; %A = shl nuw <2 x i32> %x, @@ -956,7 +956,7 @@ define <2 x i32> @test53_splat_vec(<2 x i32> %x) { define i8 @test53_no_nuw(i8 %x) { ; CHECK-LABEL: @test53_no_nuw( -; CHECK-NEXT: [[TMP1:%.*]] = shl i8 %x, 2 +; CHECK-NEXT: [[TMP1:%.*]] = shl i8 [[X:%.*]], 2 ; CHECK-NEXT: [[B:%.*]] = and i8 [[TMP1]], 124 ; CHECK-NEXT: ret i8 [[B]] ; @@ -969,7 +969,7 @@ define i8 @test53_no_nuw(i8 %x) { define <2 x i8> @test53_no_nuw_splat_vec(<2 x i8> %x) { ; CHECK-LABEL: @test53_no_nuw_splat_vec( -; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i8> %x, +; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i8> [[X:%.*]], ; CHECK-NEXT: [[B:%.*]] = and <2 x i8> [[TMP1]], ; CHECK-NEXT: ret <2 x i8> [[B]] ; @@ -980,7 +980,7 @@ define <2 x i8> @test53_no_nuw_splat_vec(<2 x i8> %x) { define i32 @test54(i32 %x) { ; CHECK-LABEL: @test54( -; CHECK-NEXT: [[TMP1:%.*]] = shl i32 %x, 3 +; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[X:%.*]], 3 ; CHECK-NEXT: [[AND:%.*]] = and i32 [[TMP1]], 16 ; CHECK-NEXT: ret i32 [[AND]] ; @@ -992,7 +992,7 @@ define i32 @test54(i32 %x) { define <2 x i32> @test54_splat_vec(<2 x i32> %x) { ; CHECK-LABEL: @test54_splat_vec( -; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i32> %x, +; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i32> [[X:%.*]], ; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[TMP1]], ; CHECK-NEXT: ret <2 x i32> [[AND]] ; @@ -1004,7 +1004,7 @@ define <2 x i32> @test54_splat_vec(<2 x i32> %x) { define i32 @test55(i32 %x) { ; CHECK-LABEL: @test55( -; CHECK-NEXT: [[TMP1:%.*]] = shl i32 %x, 3 +; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[X:%.*]], 3 ; CHECK-NEXT: [[OR:%.*]] = or i32 [[TMP1]], 8 ; CHECK-NEXT: ret i32 [[OR]] ; @@ -1016,7 +1016,7 @@ define i32 @test55(i32 %x) { define i32 @test56(i32 %x) { ; CHECK-LABEL: @test56( -; CHECK-NEXT: [[SHR2:%.*]] = lshr i32 %x, 1 +; CHECK-NEXT: [[SHR2:%.*]] = lshr i32 [[X:%.*]], 1 ; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[SHR2]], 4 ; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHL]], 7 ; CHECK-NEXT: ret i32 [[OR]] @@ -1029,8 +1029,8 @@ define i32 @test56(i32 %x) { define i32 @test57(i32 %x) { ; CHECK-LABEL: @test57( -; CHECK-NEXT: [[SHR1:%.*]] = lshr i32 %x, 1 -; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[SHR1]], 4 +; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[X:%.*]], 1 +; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[TMP1]], 4 ; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHL]], 7 ; CHECK-NEXT: ret i32 [[OR]] ; @@ -1042,7 +1042,7 @@ define i32 @test57(i32 %x) { define i32 @test58(i32 %x) { ; CHECK-LABEL: @test58( -; CHECK-NEXT: [[TMP1:%.*]] = ashr i32 %x, 3 +; CHECK-NEXT: [[TMP1:%.*]] = ashr i32 [[X:%.*]], 3 ; CHECK-NEXT: [[OR:%.*]] = or i32 [[TMP1]], 1 ; CHECK-NEXT: ret i32 [[OR]] ; @@ -1054,7 +1054,7 @@ define i32 @test58(i32 %x) { define <2 x i32> @test58_splat_vec(<2 x i32> %x) { ; CHECK-LABEL: @test58_splat_vec( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i32> %x, +; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i32> [[X:%.*]], ; CHECK-NEXT: [[OR:%.*]] = or <2 x i32> [[TMP1]], ; CHECK-NEXT: ret <2 x i32> [[OR]] ; @@ -1066,7 +1066,7 @@ define <2 x i32> @test58_splat_vec(<2 x i32> %x) { define i32 @test59(i32 %x) { ; CHECK-LABEL: @test59( -; CHECK-NEXT: [[SHR:%.*]] = ashr i32 %x, 4 +; CHECK-NEXT: [[SHR:%.*]] = ashr i32 [[X:%.*]], 4 ; CHECK-NEXT: [[SHL:%.*]] = shl nsw i32 [[SHR]], 1 ; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHL]], 2 ; CHECK-NEXT: ret i32 [[OR]] @@ -1080,7 +1080,7 @@ define i32 @test59(i32 %x) { ; propagate "exact" trait define i32 @test60(i32 %x) { ; CHECK-LABEL: @test60( -; CHECK-NEXT: [[SHL:%.*]] = ashr exact i32 %x, 3 +; CHECK-NEXT: [[SHL:%.*]] = ashr exact i32 [[X:%.*]], 3 ; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHL]], 1 ; CHECK-NEXT: ret i32 [[OR]] ; @@ -1094,19 +1094,19 @@ define i32 @test60(i32 %x) { define void @test61(i128 %arg) { ; CHECK-LABEL: @test61( ; CHECK-NEXT: bb: -; CHECK-NEXT: br i1 undef, label %bb1, label %bb12 +; CHECK-NEXT: br i1 undef, label [[BB1:%.*]], label [[BB12:%.*]] ; CHECK: bb1: -; CHECK-NEXT: br label %bb2 +; CHECK-NEXT: br label [[BB2:%.*]] ; CHECK: bb2: -; CHECK-NEXT: br i1 undef, label %bb3, label %bb7 +; CHECK-NEXT: br i1 undef, label [[BB3:%.*]], label [[BB7:%.*]] ; CHECK: bb3: -; CHECK-NEXT: br label %bb8 +; CHECK-NEXT: br label [[BB8:%.*]] ; CHECK: bb7: -; CHECK-NEXT: br i1 undef, label %bb8, label %bb2 +; CHECK-NEXT: br i1 undef, label [[BB8]], label [[BB2]] ; CHECK: bb8: -; CHECK-NEXT: br i1 undef, label %bb11, label %bb12 +; CHECK-NEXT: br i1 undef, label [[BB11:%.*]], label [[BB12]] ; CHECK: bb11: -; CHECK-NEXT: br i1 undef, label %bb1, label %bb12 +; CHECK-NEXT: br i1 undef, label [[BB1]], label [[BB12]] ; CHECK: bb12: ; CHECK-NEXT: ret void ; @@ -1159,7 +1159,7 @@ define <4 x i32> @test62_splat_vector(<4 x i32> %a) { define <4 x i32> @test62_non_splat_vector(<4 x i32> %a) { ; CHECK-LABEL: @test62_non_splat_vector( -; CHECK-NEXT: [[B:%.*]] = ashr <4 x i32> %a, +; CHECK-NEXT: [[B:%.*]] = ashr <4 x i32> [[A:%.*]], ; CHECK-NEXT: ret <4 x i32> [[B]] ; %b = ashr <4 x i32> %a, ; shift all bits out @@ -1168,7 +1168,7 @@ define <4 x i32> @test62_non_splat_vector(<4 x i32> %a) { define <2 x i65> @test_63(<2 x i64> %t) { ; CHECK-LABEL: @test_63( -; CHECK-NEXT: [[A:%.*]] = zext <2 x i64> %t to <2 x i65> +; CHECK-NEXT: [[A:%.*]] = zext <2 x i64> [[T:%.*]] to <2 x i65> ; CHECK-NEXT: [[SEXT:%.*]] = shl <2 x i65> [[A]], ; CHECK-NEXT: [[B:%.*]] = ashr exact <2 x i65> [[SEXT]], ; CHECK-NEXT: ret <2 x i65> [[B]] @@ -1294,7 +1294,7 @@ define i64 @shl_zext_mul_extra_use2(i32 %t) { define <2 x i8> @ashr_demanded_bits_splat(<2 x i8> %x) { ; CHECK-LABEL: @ashr_demanded_bits_splat( -; CHECK-NEXT: [[SHR:%.*]] = ashr <2 x i8> %x, +; CHECK-NEXT: [[SHR:%.*]] = ashr <2 x i8> [[X:%.*]], ; CHECK-NEXT: ret <2 x i8> [[SHR]] ; %and = and <2 x i8> %x, @@ -1304,7 +1304,7 @@ define <2 x i8> @ashr_demanded_bits_splat(<2 x i8> %x) { define <2 x i8> @lshr_demanded_bits_splat(<2 x i8> %x) { ; CHECK-LABEL: @lshr_demanded_bits_splat( -; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i8> %x, +; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i8> [[X:%.*]], ; CHECK-NEXT: ret <2 x i8> [[SHR]] ; %and = and <2 x i8> %x, From f304d4d185d24eec9f0ca84d566da0d51b2344ef Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Fri, 4 Oct 2019 22:16:11 +0000 Subject: [PATCH 015/254] [InstCombine] Right-shift shift amount reassociation with truncation (PR43564, PR42391) Initially (D65380) i believed that if we have rightshift-trunc-rightshift, we can't do any folding. But as it usually happens, i was wrong. https://rise4fun.com/Alive/GEw https://rise4fun.com/Alive/gN2O In https://bugs.llvm.org/show_bug.cgi?id=43564 we happen to have this very sequence, of two right shifts separated by trunc. And "just" so that happens, we apparently can fold the pattern if the total shift amount is either 0, or it's equal to the bitwidth of the innermost widest shift - i.e. if we are left with only the original sign bit. Which is exactly what is wanted there. llvm-svn: 373801 --- .../InstCombine/InstCombineShifts.cpp | 34 ++++++------ ...ount-reassociation-with-truncation-ashr.ll | 53 ++++++------------- ...ount-reassociation-with-truncation-lshr.ll | 53 ++++++------------- ...-test-via-right-shifting-all-other-bits.ll | 16 ++---- 4 files changed, 52 insertions(+), 104 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp index 9d96ddc4040d8..8ab4aeb38beaa 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -61,16 +61,10 @@ reassociateShiftAmtsOfTwoSameDirectionShifts(BinaryOperator *Sh0, if (ShiftOpcode != Sh1->getOpcode()) return nullptr; - // Did we match a pattern with truncation ? - if (Trunc) { - // For right-shifts we can't do any such simplifications. Leave as-is. - if (ShiftOpcode != Instruction::BinaryOps::Shl) - return nullptr; // FIXME: still could perform constant-folding. - // If we saw truncation, we'll need to produce extra instruction, - // and for that one of the operands of the shift must be one-use. - if (!match(Sh0, m_c_BinOp(m_OneUse(m_Value()), m_Value()))) - return nullptr; - } + // If we saw truncation, we'll need to produce extra instruction, + // and for that one of the operands of the shift must be one-use. + if (Trunc && !match(Sh0, m_c_BinOp(m_OneUse(m_Value()), m_Value()))) + return nullptr; // Can we fold (ShAmt0+ShAmt1) ? auto *NewShAmt = dyn_cast_or_null( @@ -78,13 +72,23 @@ reassociateShiftAmtsOfTwoSameDirectionShifts(BinaryOperator *Sh0, SQ.getWithInstruction(Sh0))); if (!NewShAmt) return nullptr; // Did not simplify. - // Is the new shift amount smaller than the bit width of inner shift? - if (!match(NewShAmt, m_SpecificInt_ICMP( - ICmpInst::Predicate::ICMP_ULT, - APInt(NewShAmt->getType()->getScalarSizeInBits(), - X->getType()->getScalarSizeInBits())))) + unsigned NewShAmtBitWidth = NewShAmt->getType()->getScalarSizeInBits(); + unsigned XBitWidth = X->getType()->getScalarSizeInBits(); + // Is the new shift amount smaller than the bit width of inner/new shift? + if (!match(NewShAmt, m_SpecificInt_ICMP(ICmpInst::Predicate::ICMP_ULT, + APInt(NewShAmtBitWidth, XBitWidth)))) return nullptr; // FIXME: could perform constant-folding. + // If there was a truncation, and we have a right-shift, we can only fold if + // we are left with the original sign bit. + // FIXME: zero shift amount is also legal here, but we can't *easily* check + // more than one predicate so it's not really worth it. + if (Trunc && ShiftOpcode != Instruction::BinaryOps::Shl && + !match(NewShAmt, + m_SpecificInt_ICMP(ICmpInst::Predicate::ICMP_EQ, + APInt(NewShAmtBitWidth, XBitWidth - 1)))) + return nullptr; + // All good, we can do this fold. NewShAmt = ConstantExpr::getZExtOrBitCast(NewShAmt, X->getType()); diff --git a/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-ashr.ll b/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-ashr.ll index d9571f250ccfa..a0175387d1cba 100644 --- a/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-ashr.ll +++ b/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-ashr.ll @@ -12,12 +12,8 @@ define i16 @t0(i32 %x, i16 %y) { ; CHECK-LABEL: @t0( -; CHECK-NEXT: [[T0:%.*]] = sub i16 32, [[Y:%.*]] -; CHECK-NEXT: [[T1:%.*]] = zext i16 [[T0]] to i32 -; CHECK-NEXT: [[T2:%.*]] = ashr i32 [[X:%.*]], [[T1]] -; CHECK-NEXT: [[T3:%.*]] = trunc i32 [[T2]] to i16 -; CHECK-NEXT: [[T4:%.*]] = add i16 [[Y]], -1 -; CHECK-NEXT: [[T5:%.*]] = ashr i16 [[T3]], [[T4]] +; CHECK-NEXT: [[TMP1:%.*]] = ashr i32 [[X:%.*]], 31 +; CHECK-NEXT: [[T5:%.*]] = trunc i32 [[TMP1]] to i16 ; CHECK-NEXT: ret i16 [[T5]] ; %t0 = sub i16 32, %y @@ -33,12 +29,8 @@ define i16 @t0(i32 %x, i16 %y) { define <2 x i16> @t1_vec_splat(<2 x i32> %x, <2 x i16> %y) { ; CHECK-LABEL: @t1_vec_splat( -; CHECK-NEXT: [[T0:%.*]] = sub <2 x i16> , [[Y:%.*]] -; CHECK-NEXT: [[T1:%.*]] = zext <2 x i16> [[T0]] to <2 x i32> -; CHECK-NEXT: [[T2:%.*]] = ashr <2 x i32> [[X:%.*]], [[T1]] -; CHECK-NEXT: [[T3:%.*]] = trunc <2 x i32> [[T2]] to <2 x i16> -; CHECK-NEXT: [[T4:%.*]] = add <2 x i16> [[Y]], -; CHECK-NEXT: [[T5:%.*]] = ashr <2 x i16> [[T3]], [[T4]] +; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[T5:%.*]] = trunc <2 x i32> [[TMP1]] to <2 x i16> ; CHECK-NEXT: ret <2 x i16> [[T5]] ; %t0 = sub <2 x i16> , %y @@ -52,12 +44,8 @@ define <2 x i16> @t1_vec_splat(<2 x i32> %x, <2 x i16> %y) { define <3 x i16> @t3_vec_nonsplat_undef0(<3 x i32> %x, <3 x i16> %y) { ; CHECK-LABEL: @t3_vec_nonsplat_undef0( -; CHECK-NEXT: [[T0:%.*]] = sub <3 x i16> , [[Y:%.*]] -; CHECK-NEXT: [[T1:%.*]] = zext <3 x i16> [[T0]] to <3 x i32> -; CHECK-NEXT: [[T2:%.*]] = ashr <3 x i32> [[X:%.*]], [[T1]] -; CHECK-NEXT: [[T3:%.*]] = trunc <3 x i32> [[T2]] to <3 x i16> -; CHECK-NEXT: [[T4:%.*]] = add <3 x i16> [[Y]], -; CHECK-NEXT: [[T5:%.*]] = ashr <3 x i16> [[T3]], [[T4]] +; CHECK-NEXT: [[TMP1:%.*]] = ashr <3 x i32> [[X:%.*]], +; CHECK-NEXT: [[T5:%.*]] = trunc <3 x i32> [[TMP1]] to <3 x i16> ; CHECK-NEXT: ret <3 x i16> [[T5]] ; %t0 = sub <3 x i16> , %y @@ -71,12 +59,8 @@ define <3 x i16> @t3_vec_nonsplat_undef0(<3 x i32> %x, <3 x i16> %y) { define <3 x i16> @t4_vec_nonsplat_undef1(<3 x i32> %x, <3 x i16> %y) { ; CHECK-LABEL: @t4_vec_nonsplat_undef1( -; CHECK-NEXT: [[T0:%.*]] = sub <3 x i16> , [[Y:%.*]] -; CHECK-NEXT: [[T1:%.*]] = zext <3 x i16> [[T0]] to <3 x i32> -; CHECK-NEXT: [[T2:%.*]] = ashr <3 x i32> [[X:%.*]], [[T1]] -; CHECK-NEXT: [[T3:%.*]] = trunc <3 x i32> [[T2]] to <3 x i16> -; CHECK-NEXT: [[T4:%.*]] = add <3 x i16> [[Y]], -; CHECK-NEXT: [[T5:%.*]] = ashr <3 x i16> [[T3]], [[T4]] +; CHECK-NEXT: [[TMP1:%.*]] = ashr <3 x i32> [[X:%.*]], +; CHECK-NEXT: [[T5:%.*]] = trunc <3 x i32> [[TMP1]] to <3 x i16> ; CHECK-NEXT: ret <3 x i16> [[T5]] ; %t0 = sub <3 x i16> , %y @@ -90,12 +74,8 @@ define <3 x i16> @t4_vec_nonsplat_undef1(<3 x i32> %x, <3 x i16> %y) { define <3 x i16> @t5_vec_nonsplat_undef1(<3 x i32> %x, <3 x i16> %y) { ; CHECK-LABEL: @t5_vec_nonsplat_undef1( -; CHECK-NEXT: [[T0:%.*]] = sub <3 x i16> , [[Y:%.*]] -; CHECK-NEXT: [[T1:%.*]] = zext <3 x i16> [[T0]] to <3 x i32> -; CHECK-NEXT: [[T2:%.*]] = ashr <3 x i32> [[X:%.*]], [[T1]] -; CHECK-NEXT: [[T3:%.*]] = trunc <3 x i32> [[T2]] to <3 x i16> -; CHECK-NEXT: [[T4:%.*]] = add <3 x i16> [[Y]], -; CHECK-NEXT: [[T5:%.*]] = ashr <3 x i16> [[T3]], [[T4]] +; CHECK-NEXT: [[TMP1:%.*]] = ashr <3 x i32> [[X:%.*]], +; CHECK-NEXT: [[T5:%.*]] = trunc <3 x i32> [[TMP1]] to <3 x i16> ; CHECK-NEXT: ret <3 x i16> [[T5]] ; %t0 = sub <3 x i16> , %y @@ -118,9 +98,9 @@ define i16 @t6_extrause0(i32 %x, i16 %y) { ; CHECK-NEXT: [[T1:%.*]] = zext i16 [[T0]] to i32 ; CHECK-NEXT: [[T2:%.*]] = ashr i32 [[X:%.*]], [[T1]] ; CHECK-NEXT: [[T3:%.*]] = trunc i32 [[T2]] to i16 -; CHECK-NEXT: [[T4:%.*]] = add i16 [[Y]], -1 ; CHECK-NEXT: call void @use16(i16 [[T3]]) -; CHECK-NEXT: [[T5:%.*]] = ashr i16 [[T3]], [[T4]] +; CHECK-NEXT: [[TMP1:%.*]] = ashr i32 [[X]], 31 +; CHECK-NEXT: [[T5:%.*]] = trunc i32 [[TMP1]] to i16 ; CHECK-NEXT: ret i16 [[T5]] ; %t0 = sub i16 32, %y @@ -135,13 +115,10 @@ define i16 @t6_extrause0(i32 %x, i16 %y) { define i16 @t7_extrause1(i32 %x, i16 %y) { ; CHECK-LABEL: @t7_extrause1( -; CHECK-NEXT: [[T0:%.*]] = sub i16 32, [[Y:%.*]] -; CHECK-NEXT: [[T1:%.*]] = zext i16 [[T0]] to i32 -; CHECK-NEXT: [[T2:%.*]] = ashr i32 [[X:%.*]], [[T1]] -; CHECK-NEXT: [[T3:%.*]] = trunc i32 [[T2]] to i16 -; CHECK-NEXT: [[T4:%.*]] = add i16 [[Y]], -1 +; CHECK-NEXT: [[T4:%.*]] = add i16 [[Y:%.*]], -1 ; CHECK-NEXT: call void @use16(i16 [[T4]]) -; CHECK-NEXT: [[T5:%.*]] = ashr i16 [[T3]], [[T4]] +; CHECK-NEXT: [[TMP1:%.*]] = ashr i32 [[X:%.*]], 31 +; CHECK-NEXT: [[T5:%.*]] = trunc i32 [[TMP1]] to i16 ; CHECK-NEXT: ret i16 [[T5]] ; %t0 = sub i16 32, %y diff --git a/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-lshr.ll b/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-lshr.ll index 3eae56d52d5dd..7b9962eacb117 100644 --- a/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-lshr.ll +++ b/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-lshr.ll @@ -12,12 +12,8 @@ define i16 @t0(i32 %x, i16 %y) { ; CHECK-LABEL: @t0( -; CHECK-NEXT: [[T0:%.*]] = sub i16 32, [[Y:%.*]] -; CHECK-NEXT: [[T1:%.*]] = zext i16 [[T0]] to i32 -; CHECK-NEXT: [[T2:%.*]] = lshr i32 [[X:%.*]], [[T1]] -; CHECK-NEXT: [[T3:%.*]] = trunc i32 [[T2]] to i16 -; CHECK-NEXT: [[T4:%.*]] = add i16 [[Y]], -1 -; CHECK-NEXT: [[T5:%.*]] = lshr i16 [[T3]], [[T4]] +; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[X:%.*]], 31 +; CHECK-NEXT: [[T5:%.*]] = trunc i32 [[TMP1]] to i16 ; CHECK-NEXT: ret i16 [[T5]] ; %t0 = sub i16 32, %y @@ -33,12 +29,8 @@ define i16 @t0(i32 %x, i16 %y) { define <2 x i16> @t1_vec_splat(<2 x i32> %x, <2 x i16> %y) { ; CHECK-LABEL: @t1_vec_splat( -; CHECK-NEXT: [[T0:%.*]] = sub <2 x i16> , [[Y:%.*]] -; CHECK-NEXT: [[T1:%.*]] = zext <2 x i16> [[T0]] to <2 x i32> -; CHECK-NEXT: [[T2:%.*]] = lshr <2 x i32> [[X:%.*]], [[T1]] -; CHECK-NEXT: [[T3:%.*]] = trunc <2 x i32> [[T2]] to <2 x i16> -; CHECK-NEXT: [[T4:%.*]] = add <2 x i16> [[Y]], -; CHECK-NEXT: [[T5:%.*]] = lshr <2 x i16> [[T3]], [[T4]] +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[T5:%.*]] = trunc <2 x i32> [[TMP1]] to <2 x i16> ; CHECK-NEXT: ret <2 x i16> [[T5]] ; %t0 = sub <2 x i16> , %y @@ -52,12 +44,8 @@ define <2 x i16> @t1_vec_splat(<2 x i32> %x, <2 x i16> %y) { define <3 x i16> @t3_vec_nonsplat_undef0(<3 x i32> %x, <3 x i16> %y) { ; CHECK-LABEL: @t3_vec_nonsplat_undef0( -; CHECK-NEXT: [[T0:%.*]] = sub <3 x i16> , [[Y:%.*]] -; CHECK-NEXT: [[T1:%.*]] = zext <3 x i16> [[T0]] to <3 x i32> -; CHECK-NEXT: [[T2:%.*]] = lshr <3 x i32> [[X:%.*]], [[T1]] -; CHECK-NEXT: [[T3:%.*]] = trunc <3 x i32> [[T2]] to <3 x i16> -; CHECK-NEXT: [[T4:%.*]] = add <3 x i16> [[Y]], -; CHECK-NEXT: [[T5:%.*]] = lshr <3 x i16> [[T3]], [[T4]] +; CHECK-NEXT: [[TMP1:%.*]] = lshr <3 x i32> [[X:%.*]], +; CHECK-NEXT: [[T5:%.*]] = trunc <3 x i32> [[TMP1]] to <3 x i16> ; CHECK-NEXT: ret <3 x i16> [[T5]] ; %t0 = sub <3 x i16> , %y @@ -71,12 +59,8 @@ define <3 x i16> @t3_vec_nonsplat_undef0(<3 x i32> %x, <3 x i16> %y) { define <3 x i16> @t4_vec_nonsplat_undef1(<3 x i32> %x, <3 x i16> %y) { ; CHECK-LABEL: @t4_vec_nonsplat_undef1( -; CHECK-NEXT: [[T0:%.*]] = sub <3 x i16> , [[Y:%.*]] -; CHECK-NEXT: [[T1:%.*]] = zext <3 x i16> [[T0]] to <3 x i32> -; CHECK-NEXT: [[T2:%.*]] = lshr <3 x i32> [[X:%.*]], [[T1]] -; CHECK-NEXT: [[T3:%.*]] = trunc <3 x i32> [[T2]] to <3 x i16> -; CHECK-NEXT: [[T4:%.*]] = add <3 x i16> [[Y]], -; CHECK-NEXT: [[T5:%.*]] = lshr <3 x i16> [[T3]], [[T4]] +; CHECK-NEXT: [[TMP1:%.*]] = lshr <3 x i32> [[X:%.*]], +; CHECK-NEXT: [[T5:%.*]] = trunc <3 x i32> [[TMP1]] to <3 x i16> ; CHECK-NEXT: ret <3 x i16> [[T5]] ; %t0 = sub <3 x i16> , %y @@ -90,12 +74,8 @@ define <3 x i16> @t4_vec_nonsplat_undef1(<3 x i32> %x, <3 x i16> %y) { define <3 x i16> @t5_vec_nonsplat_undef1(<3 x i32> %x, <3 x i16> %y) { ; CHECK-LABEL: @t5_vec_nonsplat_undef1( -; CHECK-NEXT: [[T0:%.*]] = sub <3 x i16> , [[Y:%.*]] -; CHECK-NEXT: [[T1:%.*]] = zext <3 x i16> [[T0]] to <3 x i32> -; CHECK-NEXT: [[T2:%.*]] = lshr <3 x i32> [[X:%.*]], [[T1]] -; CHECK-NEXT: [[T3:%.*]] = trunc <3 x i32> [[T2]] to <3 x i16> -; CHECK-NEXT: [[T4:%.*]] = add <3 x i16> [[Y]], -; CHECK-NEXT: [[T5:%.*]] = lshr <3 x i16> [[T3]], [[T4]] +; CHECK-NEXT: [[TMP1:%.*]] = lshr <3 x i32> [[X:%.*]], +; CHECK-NEXT: [[T5:%.*]] = trunc <3 x i32> [[TMP1]] to <3 x i16> ; CHECK-NEXT: ret <3 x i16> [[T5]] ; %t0 = sub <3 x i16> , %y @@ -118,9 +98,9 @@ define i16 @t6_extrause0(i32 %x, i16 %y) { ; CHECK-NEXT: [[T1:%.*]] = zext i16 [[T0]] to i32 ; CHECK-NEXT: [[T2:%.*]] = lshr i32 [[X:%.*]], [[T1]] ; CHECK-NEXT: [[T3:%.*]] = trunc i32 [[T2]] to i16 -; CHECK-NEXT: [[T4:%.*]] = add i16 [[Y]], -1 ; CHECK-NEXT: call void @use16(i16 [[T3]]) -; CHECK-NEXT: [[T5:%.*]] = lshr i16 [[T3]], [[T4]] +; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[X]], 31 +; CHECK-NEXT: [[T5:%.*]] = trunc i32 [[TMP1]] to i16 ; CHECK-NEXT: ret i16 [[T5]] ; %t0 = sub i16 32, %y @@ -135,13 +115,10 @@ define i16 @t6_extrause0(i32 %x, i16 %y) { define i16 @t7_extrause1(i32 %x, i16 %y) { ; CHECK-LABEL: @t7_extrause1( -; CHECK-NEXT: [[T0:%.*]] = sub i16 32, [[Y:%.*]] -; CHECK-NEXT: [[T1:%.*]] = zext i16 [[T0]] to i32 -; CHECK-NEXT: [[T2:%.*]] = lshr i32 [[X:%.*]], [[T1]] -; CHECK-NEXT: [[T3:%.*]] = trunc i32 [[T2]] to i16 -; CHECK-NEXT: [[T4:%.*]] = add i16 [[Y]], -1 +; CHECK-NEXT: [[T4:%.*]] = add i16 [[Y:%.*]], -1 ; CHECK-NEXT: call void @use16(i16 [[T4]]) -; CHECK-NEXT: [[T5:%.*]] = lshr i16 [[T3]], [[T4]] +; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[X:%.*]], 31 +; CHECK-NEXT: [[T5:%.*]] = trunc i32 [[TMP1]] to i16 ; CHECK-NEXT: ret i16 [[T5]] ; %t0 = sub i16 32, %y diff --git a/llvm/test/Transforms/InstCombine/sign-bit-test-via-right-shifting-all-other-bits.ll b/llvm/test/Transforms/InstCombine/sign-bit-test-via-right-shifting-all-other-bits.ll index da744ebbbeead..fa5cc4349cf50 100644 --- a/llvm/test/Transforms/InstCombine/sign-bit-test-via-right-shifting-all-other-bits.ll +++ b/llvm/test/Transforms/InstCombine/sign-bit-test-via-right-shifting-all-other-bits.ll @@ -16,13 +16,7 @@ define i1 @highest_bit_test_via_lshr(i32 %data, i32 %nbits) { define i1 @highest_bit_test_via_lshr_with_truncation(i64 %data, i32 %nbits) { ; CHECK-LABEL: @highest_bit_test_via_lshr_with_truncation( -; CHECK-NEXT: [[NUM_LOW_BITS_TO_SKIP:%.*]] = sub i32 64, [[NBITS:%.*]] -; CHECK-NEXT: [[NUM_LOW_BITS_TO_SKIP_WIDE:%.*]] = zext i32 [[NUM_LOW_BITS_TO_SKIP]] to i64 -; CHECK-NEXT: [[HIGH_BITS_EXTRACTED:%.*]] = lshr i64 [[DATA:%.*]], [[NUM_LOW_BITS_TO_SKIP_WIDE]] -; CHECK-NEXT: [[HIGH_BITS_EXTRACTED_NARROW:%.*]] = trunc i64 [[HIGH_BITS_EXTRACTED]] to i32 -; CHECK-NEXT: [[SKIP_ALL_BITS_TILL_SIGNBIT:%.*]] = add i32 [[NBITS]], -1 -; CHECK-NEXT: [[SIGNBIT:%.*]] = lshr i32 [[HIGH_BITS_EXTRACTED_NARROW]], [[SKIP_ALL_BITS_TILL_SIGNBIT]] -; CHECK-NEXT: [[ISNEG:%.*]] = icmp ne i32 [[SIGNBIT]], 0 +; CHECK-NEXT: [[ISNEG:%.*]] = icmp slt i64 [[DATA:%.*]], 0 ; CHECK-NEXT: ret i1 [[ISNEG]] ; %num_low_bits_to_skip = sub i32 64, %nbits @@ -50,12 +44,8 @@ define i1 @highest_bit_test_via_ashr(i32 %data, i32 %nbits) { define i1 @highest_bit_test_via_ashr_with_truncation(i64 %data, i32 %nbits) { ; CHECK-LABEL: @highest_bit_test_via_ashr_with_truncation( -; CHECK-NEXT: [[NUM_LOW_BITS_TO_SKIP:%.*]] = sub i32 64, [[NBITS:%.*]] -; CHECK-NEXT: [[NUM_LOW_BITS_TO_SKIP_WIDE:%.*]] = zext i32 [[NUM_LOW_BITS_TO_SKIP]] to i64 -; CHECK-NEXT: [[HIGH_BITS_EXTRACTED:%.*]] = ashr i64 [[DATA:%.*]], [[NUM_LOW_BITS_TO_SKIP_WIDE]] -; CHECK-NEXT: [[HIGH_BITS_EXTRACTED_NARROW:%.*]] = trunc i64 [[HIGH_BITS_EXTRACTED]] to i32 -; CHECK-NEXT: [[SKIP_ALL_BITS_TILL_SIGNBIT:%.*]] = add i32 [[NBITS]], -1 -; CHECK-NEXT: [[SIGNBIT:%.*]] = ashr i32 [[HIGH_BITS_EXTRACTED_NARROW]], [[SKIP_ALL_BITS_TILL_SIGNBIT]] +; CHECK-NEXT: [[TMP1:%.*]] = ashr i64 [[DATA:%.*]], 63 +; CHECK-NEXT: [[SIGNBIT:%.*]] = trunc i64 [[TMP1]] to i32 ; CHECK-NEXT: [[ISNEG:%.*]] = icmp ne i32 [[SIGNBIT]], 0 ; CHECK-NEXT: ret i1 [[ISNEG]] ; From fb5af8b9b93cb270fda7832e24cab9cb62dec16f Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Fri, 4 Oct 2019 22:16:22 +0000 Subject: [PATCH 016/254] [InstCombine] Fold 'icmp eq/ne (?trunc (lshr/ashr %x, bitwidth(x)-1)), 0' -> 'icmp sge/slt %x, 0' We do indeed already get it right in some cases, but only transitively, with one-use restrictions. Since we only need to produce a single comparison, it makes sense to match the pattern directly: https://rise4fun.com/Alive/kPg llvm-svn: 373802 --- .../InstCombine/InstCombineCompares.cpp | 28 +++++++++++++++++++ llvm/test/Transforms/InstCombine/shift.ll | 4 +-- ...-test-via-right-shifting-all-other-bits.ll | 18 +++++------- 3 files changed, 37 insertions(+), 13 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index ddc7de39d8d2a..f07f64e3f02ea 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -1384,6 +1384,29 @@ Instruction *InstCombiner::foldIRemByPowerOfTwoToBitTest(ICmpInst &I) { return ICmpInst::Create(Instruction::ICmp, Pred, Masked, Zero); } +/// Fold equality-comparison between zero and any (maybe truncated) right-shift +/// by one-less-than-bitwidth into a sign test on the original value. +Instruction *foldSignBitTest(ICmpInst &I) { + ICmpInst::Predicate Pred; + Value *X; + Constant *C; + if (!I.isEquality() || + !match(&I, m_ICmp(Pred, m_TruncOrSelf(m_Shr(m_Value(X), m_Constant(C))), + m_Zero()))) + return nullptr; + + Type *XTy = X->getType(); + unsigned XBitWidth = XTy->getScalarSizeInBits(); + if (!match(C, m_SpecificInt_ICMP(ICmpInst::Predicate::ICMP_EQ, + APInt(XBitWidth, XBitWidth - 1)))) + return nullptr; + + return ICmpInst::Create(Instruction::ICmp, + Pred == ICmpInst::ICMP_EQ ? ICmpInst::ICMP_SGE + : ICmpInst::ICMP_SLT, + X, ConstantInt::getNullValue(XTy)); +} + // Handle icmp pred X, 0 Instruction *InstCombiner::foldICmpWithZero(ICmpInst &Cmp) { CmpInst::Predicate Pred = Cmp.getPredicate(); @@ -5449,6 +5472,11 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { if (Instruction *Res = foldICmpInstWithConstant(I)) return Res; + // Try to match comparison as a sign bit test. Intentionally do this after + // foldICmpInstWithConstant() to potentially let other folds to happen first. + if (Instruction *New = foldSignBitTest(I)) + return New; + if (Instruction *Res = foldICmpInstWithConstantNotInt(I)) return Res; diff --git a/llvm/test/Transforms/InstCombine/shift.ll b/llvm/test/Transforms/InstCombine/shift.ll index 501f015ed730e..9ded69ad7b900 100644 --- a/llvm/test/Transforms/InstCombine/shift.ll +++ b/llvm/test/Transforms/InstCombine/shift.ll @@ -428,8 +428,8 @@ define i8 @test28a(i8 %x, i8 %y) { ; CHECK-LABEL: @test28a( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP1:%.*]] = lshr i8 [[X:%.*]], 7 -; CHECK-NEXT: [[COND1:%.*]] = icmp eq i8 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[COND1]], label [[BB2:%.*]], label [[BB1:%.*]] +; CHECK-NEXT: [[COND1:%.*]] = icmp slt i8 [[X]], 0 +; CHECK-NEXT: br i1 [[COND1]], label [[BB1:%.*]], label [[BB2:%.*]] ; CHECK: bb1: ; CHECK-NEXT: ret i8 [[TMP1]] ; CHECK: bb2: diff --git a/llvm/test/Transforms/InstCombine/sign-bit-test-via-right-shifting-all-other-bits.ll b/llvm/test/Transforms/InstCombine/sign-bit-test-via-right-shifting-all-other-bits.ll index fa5cc4349cf50..c6507afab1f38 100644 --- a/llvm/test/Transforms/InstCombine/sign-bit-test-via-right-shifting-all-other-bits.ll +++ b/llvm/test/Transforms/InstCombine/sign-bit-test-via-right-shifting-all-other-bits.ll @@ -44,9 +44,7 @@ define i1 @highest_bit_test_via_ashr(i32 %data, i32 %nbits) { define i1 @highest_bit_test_via_ashr_with_truncation(i64 %data, i32 %nbits) { ; CHECK-LABEL: @highest_bit_test_via_ashr_with_truncation( -; CHECK-NEXT: [[TMP1:%.*]] = ashr i64 [[DATA:%.*]], 63 -; CHECK-NEXT: [[SIGNBIT:%.*]] = trunc i64 [[TMP1]] to i32 -; CHECK-NEXT: [[ISNEG:%.*]] = icmp ne i32 [[SIGNBIT]], 0 +; CHECK-NEXT: [[ISNEG:%.*]] = icmp slt i64 [[DATA:%.*]], 0 ; CHECK-NEXT: ret i1 [[ISNEG]] ; %num_low_bits_to_skip = sub i32 64, %nbits @@ -75,7 +73,7 @@ define i1 @unsigned_sign_bit_extract_extrause(i32 %x) { ; CHECK-LABEL: @unsigned_sign_bit_extract_extrause( ; CHECK-NEXT: [[SIGNBIT:%.*]] = lshr i32 [[X:%.*]], 31 ; CHECK-NEXT: call void @use32(i32 [[SIGNBIT]]) -; CHECK-NEXT: [[ISNEG:%.*]] = icmp ne i32 [[SIGNBIT]], 0 +; CHECK-NEXT: [[ISNEG:%.*]] = icmp slt i32 [[X]], 0 ; CHECK-NEXT: ret i1 [[ISNEG]] ; %signbit = lshr i32 %x, 31 @@ -87,7 +85,7 @@ define i1 @unsigned_sign_bit_extract_extrause__ispositive(i32 %x) { ; CHECK-LABEL: @unsigned_sign_bit_extract_extrause__ispositive( ; CHECK-NEXT: [[SIGNBIT:%.*]] = lshr i32 [[X:%.*]], 31 ; CHECK-NEXT: call void @use32(i32 [[SIGNBIT]]) -; CHECK-NEXT: [[ISNEG:%.*]] = icmp eq i32 [[SIGNBIT]], 0 +; CHECK-NEXT: [[ISNEG:%.*]] = icmp sgt i32 [[X]], -1 ; CHECK-NEXT: ret i1 [[ISNEG]] ; %signbit = lshr i32 %x, 31 @@ -108,7 +106,7 @@ define i1 @signed_sign_bit_extract_extrause(i32 %x) { ; CHECK-LABEL: @signed_sign_bit_extract_extrause( ; CHECK-NEXT: [[SIGNSMEAR:%.*]] = ashr i32 [[X:%.*]], 31 ; CHECK-NEXT: call void @use32(i32 [[SIGNSMEAR]]) -; CHECK-NEXT: [[ISNEG:%.*]] = icmp ne i32 [[SIGNSMEAR]], 0 +; CHECK-NEXT: [[ISNEG:%.*]] = icmp slt i32 [[X]], 0 ; CHECK-NEXT: ret i1 [[ISNEG]] ; %signsmear = ashr i32 %x, 31 @@ -132,7 +130,7 @@ define i1 @unsigned_sign_bit_extract_with_trunc_extrause(i64 %x) { ; CHECK-NEXT: call void @use64(i64 [[SIGNBIT]]) ; CHECK-NEXT: [[SIGNBIT_NARROW:%.*]] = trunc i64 [[SIGNBIT]] to i32 ; CHECK-NEXT: call void @use32(i32 [[SIGNBIT_NARROW]]) -; CHECK-NEXT: [[ISNEG:%.*]] = icmp ne i32 [[SIGNBIT_NARROW]], 0 +; CHECK-NEXT: [[ISNEG:%.*]] = icmp slt i64 [[X]], 0 ; CHECK-NEXT: ret i1 [[ISNEG]] ; %signbit = lshr i64 %x, 63 @@ -144,9 +142,7 @@ define i1 @unsigned_sign_bit_extract_with_trunc_extrause(i64 %x) { } define i1 @signed_sign_bit_extract_trunc(i64 %x) { ; CHECK-LABEL: @signed_sign_bit_extract_trunc( -; CHECK-NEXT: [[SIGNSMEAR:%.*]] = ashr i64 [[X:%.*]], 63 -; CHECK-NEXT: [[SIGNSMEAR_NARROW:%.*]] = trunc i64 [[SIGNSMEAR]] to i32 -; CHECK-NEXT: [[ISNEG:%.*]] = icmp ne i32 [[SIGNSMEAR_NARROW]], 0 +; CHECK-NEXT: [[ISNEG:%.*]] = icmp slt i64 [[X:%.*]], 0 ; CHECK-NEXT: ret i1 [[ISNEG]] ; %signsmear = ashr i64 %x, 63 @@ -160,7 +156,7 @@ define i1 @signed_sign_bit_extract_trunc_extrause(i64 %x) { ; CHECK-NEXT: call void @use64(i64 [[SIGNSMEAR]]) ; CHECK-NEXT: [[SIGNSMEAR_NARROW:%.*]] = trunc i64 [[SIGNSMEAR]] to i32 ; CHECK-NEXT: call void @use32(i32 [[SIGNSMEAR_NARROW]]) -; CHECK-NEXT: [[ISNEG:%.*]] = icmp ne i32 [[SIGNSMEAR_NARROW]], 0 +; CHECK-NEXT: [[ISNEG:%.*]] = icmp slt i64 [[X]], 0 ; CHECK-NEXT: ret i1 [[ISNEG]] ; %signsmear = ashr i64 %x, 63 From 5042882698840ba723360a7149091e1fc8565e34 Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Fri, 4 Oct 2019 22:21:32 +0000 Subject: [PATCH 017/254] [MachO] Move nlist parsing into helper function (NFC) llvm-svn: 373803 --- .../ObjectFile/Mach-O/ObjectFileMachO.cpp | 35 ++++++++----------- 1 file changed, 14 insertions(+), 21 deletions(-) diff --git a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp index 508a7325353c7..81b7397a87658 100644 --- a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp +++ b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp @@ -2038,6 +2038,18 @@ UUID ObjectFileMachO::GetSharedCacheUUID(FileSpec dyld_shared_cache, return dsc_uuid; } +bool ParseNList(DataExtractor &nlist_data, lldb::offset_t &nlist_data_offset, + size_t nlist_byte_size, struct nlist_64 &nlist) { + if (!nlist_data.ValidOffsetForDataOfSize(nlist_data_offset, nlist_byte_size)) + return false; + nlist.n_strx = nlist_data.GetU32_unchecked(&nlist_data_offset); + nlist.n_type = nlist_data.GetU8_unchecked(&nlist_data_offset); + nlist.n_sect = nlist_data.GetU8_unchecked(&nlist_data_offset); + nlist.n_desc = nlist_data.GetU16_unchecked(&nlist_data_offset); + nlist.n_value = nlist_data.GetAddress_unchecked(&nlist_data_offset); + return true; +} + size_t ObjectFileMachO::ParseSymtab() { static Timer::Category func_cat(LLVM_PRETTY_FUNCTION); Timer scoped_timer(func_cat, "ObjectFileMachO::ParseSymtab () module = %s", @@ -2734,21 +2746,9 @@ size_t ObjectFileMachO::ParseSymtab() { ///////////////////////////// { struct nlist_64 nlist; - if (!dsc_local_symbols_data.ValidOffsetForDataOfSize( - nlist_data_offset, nlist_byte_size)) + if (!ParseNList(dsc_local_symbols_data, nlist_data_offset, nlist_byte_size, nlist) break; - nlist.n_strx = dsc_local_symbols_data.GetU32_unchecked( - &nlist_data_offset); - nlist.n_type = dsc_local_symbols_data.GetU8_unchecked( - &nlist_data_offset); - nlist.n_sect = dsc_local_symbols_data.GetU8_unchecked( - &nlist_data_offset); - nlist.n_desc = dsc_local_symbols_data.GetU16_unchecked( - &nlist_data_offset); - nlist.n_value = dsc_local_symbols_data.GetAddress_unchecked( - &nlist_data_offset); - SymbolType type = eSymbolTypeInvalid; const char *symbol_name = dsc_local_symbols_data.PeekCStr( string_table_offset + nlist.n_strx); @@ -3681,16 +3681,9 @@ size_t ObjectFileMachO::ParseSymtab() { SymbolIndexToName reexport_shlib_needs_fixup; for (; nlist_idx < symtab_load_command.nsyms; ++nlist_idx) { struct nlist_64 nlist; - if (!nlist_data.ValidOffsetForDataOfSize(nlist_data_offset, - nlist_byte_size)) + if (!ParseNList(nlist_data, nlist_data_offset, nlist_byte_size, nlist)) break; - nlist.n_strx = nlist_data.GetU32_unchecked(&nlist_data_offset); - nlist.n_type = nlist_data.GetU8_unchecked(&nlist_data_offset); - nlist.n_sect = nlist_data.GetU8_unchecked(&nlist_data_offset); - nlist.n_desc = nlist_data.GetU16_unchecked(&nlist_data_offset); - nlist.n_value = nlist_data.GetAddress_unchecked(&nlist_data_offset); - SymbolType type = eSymbolTypeInvalid; const char *symbol_name = nullptr; From cd5cd7d14c7ab0a24d40dc3e6588dc817565392c Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Fri, 4 Oct 2019 22:21:35 +0000 Subject: [PATCH 018/254] [test] Run TestLaunchWithShellExpand with /bin/sh on POSIX. Now that we do shell expansion on POSIX with the user's shel, this test can potentially fail. This should ensure that we always use /bin/sh. llvm-svn: 373804 --- .../launch-with-shellexpand/TestLaunchWithShellExpand.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lldb/packages/Python/lldbsuite/test/commands/process/launch-with-shellexpand/TestLaunchWithShellExpand.py b/lldb/packages/Python/lldbsuite/test/commands/process/launch-with-shellexpand/TestLaunchWithShellExpand.py index 1ba812159fbea..64a2894069b35 100644 --- a/lldb/packages/Python/lldbsuite/test/commands/process/launch-with-shellexpand/TestLaunchWithShellExpand.py +++ b/lldb/packages/Python/lldbsuite/test/commands/process/launch-with-shellexpand/TestLaunchWithShellExpand.py @@ -2,6 +2,7 @@ Test that argdumper is a viable launching strategy. """ from __future__ import print_function +import os import lldb @@ -36,6 +37,9 @@ def test(self): 'break here', lldb.SBFileSpec("main.cpp", False)) self.assertTrue(breakpoint, VALID_BREAKPOINT) + # Ensure we do the expansion with /bin/sh on POSIX. + os.environ["SHELL"] = '/bin/sh' + self.runCmd( "process launch -X true -w %s -- fi*.tx? () > <" % (self.getSourceDir())) From 67cfa79c01002d57ad8267ca1df6425cb67331bf Mon Sep 17 00:00:00 2001 From: Reid Kleckner Date: Fri, 4 Oct 2019 22:24:21 +0000 Subject: [PATCH 019/254] Revert [CodeGen] Do the Simple Early Return in block-placement pass to optimize the blocks This reverts r371177 (git commit f879c6875563c0a8cd838f1e13b14dd33558f1f8) It caused PR43566 by removing empty, address-taken MachineBasicBlocks. Such blocks may have references from blockaddress or other operands, and need more consideration to be removed. See the PR for a test case to use when relanding. llvm-svn: 373805 --- llvm/lib/CodeGen/MachineBlockPlacement.cpp | 46 ------------------- llvm/test/CodeGen/PowerPC/block-placement.mir | 12 +++-- 2 files changed, 8 insertions(+), 50 deletions(-) diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp index 27438ecf0adc8..ac19bc0bd8ea2 100644 --- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp +++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp @@ -38,7 +38,6 @@ #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachinePostDominators.h" @@ -2713,7 +2712,6 @@ void MachineBlockPlacement::optimizeBranches() { // cannot because all branches may not be analyzable. // E.g., the target may be able to remove an unconditional branch to // a fallthrough when it occurs after predicated terminators. - SmallVector EmptyBB; for (MachineBasicBlock *ChainBB : FunctionChain) { Cond.clear(); MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For AnalyzeBranch. @@ -2733,50 +2731,9 @@ void MachineBlockPlacement::optimizeBranches() { TII->removeBranch(*ChainBB); TII->insertBranch(*ChainBB, FBB, TBB, Cond, dl); ChainBB->updateTerminator(); - } else if (Cond.empty() && TBB && ChainBB != TBB && !TBB->empty() && - !TBB->canFallThrough()) { - // When ChainBB is unconditional branch to the TBB, and TBB has no - // fallthrough predecessor and fallthrough successor, try to merge - // ChainBB and TBB. This is legal under the one of following conditions: - // 1. ChainBB is empty except for an unconditional branch. - // 2. TBB has only one predecessor. - MachineFunction::iterator I(TBB); - if (((TBB == &*F->begin()) || !std::prev(I)->canFallThrough()) && - (TailDup.isSimpleBB(ChainBB) || (TBB->pred_size() == 1))) { - TII->removeBranch(*ChainBB); - ChainBB->removeSuccessor(TBB); - - // Update the CFG. - while (!TBB->pred_empty()) { - MachineBasicBlock *Pred = *(TBB->pred_end() - 1); - Pred->ReplaceUsesOfBlockWith(TBB, ChainBB); - } - - while (!TBB->succ_empty()) { - MachineBasicBlock *Succ = *(TBB->succ_end() - 1); - ChainBB->addSuccessor(Succ, MBPI->getEdgeProbability(TBB, Succ)); - TBB->removeSuccessor(Succ); - } - - // Move all the instructions of TBB to ChainBB. - ChainBB->splice(ChainBB->end(), TBB, TBB->begin(), TBB->end()); - EmptyBB.push_back(TBB); - - // If TBB was the target of a jump table, update jump tables to go to - // the ChainBB instead. - if (MachineJumpTableInfo *MJTI = F->getJumpTableInfo()) - MJTI->ReplaceMBBInJumpTables(TBB, ChainBB); - } } } } - - for (auto BB: EmptyBB) { - MLI->removeBlock(BB); - FunctionChain.remove(BB); - BlockToChain.erase(BB); - F->erase(BB); - } } void MachineBlockPlacement::alignBlocks() { @@ -3097,9 +3054,6 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) { } } - // optimizeBranches() may change the blocks, but we haven't updated the - // post-dominator tree. Because the post-dominator tree won't be used after - // this function and this pass don't preserve the post-dominator tree. optimizeBranches(); alignBlocks(); diff --git a/llvm/test/CodeGen/PowerPC/block-placement.mir b/llvm/test/CodeGen/PowerPC/block-placement.mir index 54bd9b8e92393..9dc911f785b5e 100644 --- a/llvm/test/CodeGen/PowerPC/block-placement.mir +++ b/llvm/test/CodeGen/PowerPC/block-placement.mir @@ -209,10 +209,14 @@ body: | BLR8 implicit $lr8, implicit $rm, implicit killed $x3 ; CHECK: bb.5.if.else.i: - ; CHECK-NEXT: renamable $x3 = LI8 1 - ; CHECK-NEXT: BLR8 implicit $lr8, implicit $rm, implicit killed $x3 + ; CHECK: successors: %bb.11(0x80000000) + ; CHECK: B %bb.11 ; CHECK: bb.8.while.body.i (align 16): - ; CHECK: successors: %bb.5(0x04000000), %bb.9(0x7c000000) - ; CHECK: BCC 76, killed renamable $cr0, %bb.5 + ; CHECK: successors: %bb.11(0x04000000), %bb.9(0x7c000000) + ; CHECK: BCC 76, killed renamable $cr0, %bb.11 + + ; CHECK: bb.11: + ; CHECK: renamable $x3 = LI8 1 + ; CHECK-NEXT: BLR8 implicit $lr8, implicit $rm, implicit killed $x3 ... From 58fd6b5b9c882ea32dc9673abaddc6dda564330e Mon Sep 17 00:00:00 2001 From: Jon Chesterfield Date: Fri, 4 Oct 2019 22:30:28 +0000 Subject: [PATCH 020/254] [libomptarget][nfc] Update remaining uint32 to use lanemask_t Summary: [libomptarget][nfc] Update remaining uint32 to use lanemask_t Update a few functions in the API to use lanemask_t instead of i32. NFC for nvptx. Also update the ActiveThreads type in DataSharingStateTy. This removes a lot of #ifdef from the downsteam amdgcn implementation. Reviewers: ABataev, jdoerfert, grokos, ronlieb, RaviNarayanaswamy Subscribers: openmp-commits Tags: #openmp Differential Revision: https://reviews.llvm.org/D68513 llvm-svn: 373806 --- .../deviceRTLs/nvptx/src/data_sharing.cu | 8 ++++---- .../deviceRTLs/nvptx/src/interface.h | 16 ++++++++++------ openmp/libomptarget/deviceRTLs/nvptx/src/loop.cu | 3 ++- .../deviceRTLs/nvptx/src/omptarget-nvptx.h | 2 +- .../deviceRTLs/nvptx/src/parallel.cu | 14 ++++++++------ openmp/libomptarget/deviceRTLs/nvptx/src/sync.cu | 4 ++-- 6 files changed, 27 insertions(+), 20 deletions(-) diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu index bd4cfec9f2e47..5e936b0161548 100644 --- a/openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu +++ b/openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu @@ -96,7 +96,7 @@ __kmpc_initialize_data_sharing_environment(__kmpc_data_sharing_slot *rootS, EXTERN void *__kmpc_data_sharing_environment_begin( __kmpc_data_sharing_slot **SavedSharedSlot, void **SavedSharedStack, - void **SavedSharedFrame, int32_t *SavedActiveThreads, + void **SavedSharedFrame, __kmpc_impl_lanemask_t *SavedActiveThreads, size_t SharingDataSize, size_t SharingDefaultDataSize, int16_t IsOMPRuntimeInitialized) { @@ -117,7 +117,7 @@ EXTERN void *__kmpc_data_sharing_environment_begin( __kmpc_data_sharing_slot *&SlotP = DataSharingState.SlotPtr[WID]; void *&StackP = DataSharingState.StackPtr[WID]; void * volatile &FrameP = DataSharingState.FramePtr[WID]; - int32_t &ActiveT = DataSharingState.ActiveThreads[WID]; + __kmpc_impl_lanemask_t &ActiveT = DataSharingState.ActiveThreads[WID]; DSPRINT0(DSFLAG, "Save current slot/stack values.\n"); // Save the current values. @@ -225,7 +225,7 @@ EXTERN void *__kmpc_data_sharing_environment_begin( EXTERN void __kmpc_data_sharing_environment_end( __kmpc_data_sharing_slot **SavedSharedSlot, void **SavedSharedStack, - void **SavedSharedFrame, int32_t *SavedActiveThreads, + void **SavedSharedFrame, __kmpc_impl_lanemask_t *SavedActiveThreads, int32_t IsEntryPoint) { DSPRINT0(DSFLAG, "Entering __kmpc_data_sharing_environment_end\n"); @@ -260,7 +260,7 @@ EXTERN void __kmpc_data_sharing_environment_end( // assume that threads will converge right after the call site that started // the environment. if (IsWarpMasterActiveThread()) { - int32_t &ActiveT = DataSharingState.ActiveThreads[WID]; + __kmpc_impl_lanemask_t &ActiveT = DataSharingState.ActiveThreads[WID]; DSPRINT0(DSFLAG, "Before restoring the stack\n"); // Zero the bits in the mask. If it is still different from zero, then we diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/interface.h b/openmp/libomptarget/deviceRTLs/nvptx/src/interface.h index ab57715592e52..4a84922e2441e 100644 --- a/openmp/libomptarget/deviceRTLs/nvptx/src/interface.h +++ b/openmp/libomptarget/deviceRTLs/nvptx/src/interface.h @@ -19,6 +19,7 @@ #define _INTERFACES_H_ #include "option.h" +#include "target_impl.h" //////////////////////////////////////////////////////////////////////////////// // OpenMP interface @@ -422,9 +423,9 @@ EXTERN void __kmpc_end_critical(kmp_Ident *loc, int32_t global_tid, EXTERN void __kmpc_flush(kmp_Ident *loc); // vote -EXTERN int32_t __kmpc_warp_active_thread_mask(); +EXTERN __kmpc_impl_lanemask_t __kmpc_warp_active_thread_mask(); // syncwarp -EXTERN void __kmpc_syncwarp(int32_t); +EXTERN void __kmpc_syncwarp(__kmpc_impl_lanemask_t); // tasks EXTERN kmp_TaskDescr *__kmpc_omp_task_alloc(kmp_Ident *loc, @@ -475,11 +476,13 @@ EXTERN void __kmpc_kernel_prepare_parallel(void *WorkFn, EXTERN bool __kmpc_kernel_parallel(void **WorkFn, int16_t IsOMPRuntimeInitialized); EXTERN void __kmpc_kernel_end_parallel(); -EXTERN bool __kmpc_kernel_convergent_parallel(void *buffer, uint32_t Mask, +EXTERN bool __kmpc_kernel_convergent_parallel(void *buffer, + __kmpc_impl_lanemask_t Mask, bool *IsFinal, int32_t *LaneSource); EXTERN void __kmpc_kernel_end_convergent_parallel(void *buffer); -EXTERN bool __kmpc_kernel_convergent_simd(void *buffer, uint32_t Mask, +EXTERN bool __kmpc_kernel_convergent_simd(void *buffer, + __kmpc_impl_lanemask_t Mask, bool *IsFinal, int32_t *LaneSource, int32_t *LaneId, int32_t *NumLanes); EXTERN void __kmpc_kernel_end_convergent_simd(void *buffer); @@ -510,12 +513,13 @@ __kmpc_initialize_data_sharing_environment(__kmpc_data_sharing_slot *RootS, size_t InitialDataSize); EXTERN void *__kmpc_data_sharing_environment_begin( __kmpc_data_sharing_slot **SavedSharedSlot, void **SavedSharedStack, - void **SavedSharedFrame, int32_t *SavedActiveThreads, + void **SavedSharedFrame, __kmpc_impl_lanemask_t *SavedActiveThreads, size_t SharingDataSize, size_t SharingDefaultDataSize, int16_t IsOMPRuntimeInitialized); EXTERN void __kmpc_data_sharing_environment_end( __kmpc_data_sharing_slot **SavedSharedSlot, void **SavedSharedStack, - void **SavedSharedFrame, int32_t *SavedActiveThreads, int32_t IsEntryPoint); + void **SavedSharedFrame, __kmpc_impl_lanemask_t *SavedActiveThreads, + int32_t IsEntryPoint); EXTERN void * __kmpc_get_data_sharing_environment_frame(int32_t SourceThreadID, diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/loop.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/loop.cu index f9a30152b7ea7..ee378460ac2da 100644 --- a/openmp/libomptarget/deviceRTLs/nvptx/src/loop.cu +++ b/openmp/libomptarget/deviceRTLs/nvptx/src/loop.cu @@ -380,7 +380,8 @@ public: //////////////////////////////////////////////////////////////////////////////// // Support for dispatch next - INLINE static int64_t Shuffle(unsigned active, int64_t val, int leader) { + INLINE static uint64_t Shuffle(__kmpc_impl_lanemask_t active, int64_t val, + int leader) { uint32_t lo, hi; __kmpc_impl_unpack(val, lo, hi); hi = __kmpc_impl_shfl_sync(active, hi, leader); diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h b/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h index 2299d24e035b8..70e6c286a187e 100644 --- a/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h +++ b/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h @@ -107,7 +107,7 @@ struct DataSharingStateTy { __kmpc_data_sharing_slot *SlotPtr[DS_Max_Warp_Number]; void *StackPtr[DS_Max_Warp_Number]; void * volatile FramePtr[DS_Max_Warp_Number]; - int32_t ActiveThreads[DS_Max_Warp_Number]; + __kmpc_impl_lanemask_t ActiveThreads[DS_Max_Warp_Number]; }; // Additional worker slot type which is initialized with the default worker slot // size of 4*32 bytes. diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/parallel.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/parallel.cu index 24a235df52569..016ded8a543e8 100644 --- a/openmp/libomptarget/deviceRTLs/nvptx/src/parallel.cu +++ b/openmp/libomptarget/deviceRTLs/nvptx/src/parallel.cu @@ -44,13 +44,14 @@ typedef struct ConvergentSimdJob { //////////////////////////////////////////////////////////////////////////////// // support for convergent simd (team of threads in a warp only) //////////////////////////////////////////////////////////////////////////////// -EXTERN bool __kmpc_kernel_convergent_simd(void *buffer, uint32_t Mask, +EXTERN bool __kmpc_kernel_convergent_simd(void *buffer, + __kmpc_impl_lanemask_t Mask, bool *IsFinal, int32_t *LaneSource, int32_t *LaneId, int32_t *NumLanes) { PRINT0(LD_IO, "call to __kmpc_kernel_convergent_simd\n"); - uint32_t ConvergentMask = Mask; + __kmpc_impl_lanemask_t ConvergentMask = Mask; int32_t ConvergentSize = __kmpc_impl_popc(ConvergentMask); - uint32_t WorkRemaining = ConvergentMask >> (*LaneSource + 1); + __kmpc_impl_lanemask_t WorkRemaining = ConvergentMask >> (*LaneSource + 1); *LaneSource += __kmpc_impl_ffs(WorkRemaining); *IsFinal = __kmpc_impl_popc(WorkRemaining) == 1; __kmpc_impl_lanemask_t lanemask_lt = __kmpc_impl_lanemask_lt(); @@ -117,13 +118,14 @@ typedef struct ConvergentParallelJob { //////////////////////////////////////////////////////////////////////////////// // support for convergent parallelism (team of threads in a warp only) //////////////////////////////////////////////////////////////////////////////// -EXTERN bool __kmpc_kernel_convergent_parallel(void *buffer, uint32_t Mask, +EXTERN bool __kmpc_kernel_convergent_parallel(void *buffer, + __kmpc_impl_lanemask_t Mask, bool *IsFinal, int32_t *LaneSource) { PRINT0(LD_IO, "call to __kmpc_kernel_convergent_parallel\n"); - uint32_t ConvergentMask = Mask; + __kmpc_impl_lanemask_t ConvergentMask = Mask; int32_t ConvergentSize = __kmpc_impl_popc(ConvergentMask); - uint32_t WorkRemaining = ConvergentMask >> (*LaneSource + 1); + __kmpc_impl_lanemask_t WorkRemaining = ConvergentMask >> (*LaneSource + 1); *LaneSource += __kmpc_impl_ffs(WorkRemaining); *IsFinal = __kmpc_impl_popc(WorkRemaining) == 1; __kmpc_impl_lanemask_t lanemask_lt = __kmpc_impl_lanemask_lt(); diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/sync.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/sync.cu index 343293e006f8a..28a541901c3d5 100644 --- a/openmp/libomptarget/deviceRTLs/nvptx/src/sync.cu +++ b/openmp/libomptarget/deviceRTLs/nvptx/src/sync.cu @@ -140,7 +140,7 @@ EXTERN void __kmpc_flush(kmp_Ident *loc) { // Vote //////////////////////////////////////////////////////////////////////////////// -EXTERN int32_t __kmpc_warp_active_thread_mask() { +EXTERN __kmpc_impl_lanemask_t __kmpc_warp_active_thread_mask() { PRINT0(LD_IO, "call __kmpc_warp_active_thread_mask\n"); return __kmpc_impl_activemask(); } @@ -149,7 +149,7 @@ EXTERN int32_t __kmpc_warp_active_thread_mask() { // Syncwarp //////////////////////////////////////////////////////////////////////////////// -EXTERN void __kmpc_syncwarp(int32_t Mask) { +EXTERN void __kmpc_syncwarp(__kmpc_impl_lanemask_t Mask) { PRINT0(LD_IO, "call __kmpc_syncwarp\n"); __kmpc_impl_syncwarp(Mask); } From 6a2673605e50d42d2326e51e29a0d78361c7a570 Mon Sep 17 00:00:00 2001 From: Aditya Kumar Date: Fri, 4 Oct 2019 22:46:42 +0000 Subject: [PATCH 021/254] Invalidate assumption cache before outlining. Subscribers: llvm-commits Tags: #llvm Reviewers: compnerd, vsk, sebpop, fhahn, tejohnson Reviewed by: vsk Differential Revision: https://reviews.llvm.org/D68478 llvm-svn: 373807 --- .../llvm/Transforms/Utils/CodeExtractor.h | 5 ++++ llvm/lib/Transforms/IPO/HotColdSplitting.cpp | 5 ---- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 28 ++++++++++++++----- .../assumption-cache-invalidation.ll | 12 ++++++++ 4 files changed, 38 insertions(+), 12 deletions(-) diff --git a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h index 8ff5172339326..74584bce910a7 100644 --- a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h +++ b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h @@ -106,6 +106,11 @@ class Value; /// returns false. Function *extractCodeRegion(); + /// Verify that assumption cache isn't stale after a region is extracted. + /// Returns false when verifier finds errors. AssumptionCache is passed as + /// parameter to make this function stateless. + static bool verifyAssumptionCache(const Function& F, AssumptionCache *AC); + /// Test whether this code extractor is eligible. /// /// Based on the blocks used when constructing the code extractor, diff --git a/llvm/lib/Transforms/IPO/HotColdSplitting.cpp b/llvm/lib/Transforms/IPO/HotColdSplitting.cpp index 31571c4a20a87..bd641da37f552 100644 --- a/llvm/lib/Transforms/IPO/HotColdSplitting.cpp +++ b/llvm/lib/Transforms/IPO/HotColdSplitting.cpp @@ -628,11 +628,6 @@ bool HotColdSplitting::outlineColdRegions(Function &F, bool HasProfileSummary) { } while (!Region.empty()); } - // We need to explicitly clear the assumption cache since the value tracking - // may now be invalid as part of the function has changed. - if (Changed) - if (AssumptionCache *AC = LookupAC(F)) - AC->clear(); return Changed; } diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index fcacc3a68002c..1fe520b161020 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1301,13 +1301,6 @@ void CodeExtractor::moveCodeToFunction(Function *newFunction) { // Insert this basic block into the new function newBlocks.push_back(Block); - - // Remove @llvm.assume calls that were moved to the new function from the - // old function's assumption cache. - if (AC) - for (auto &I : *Block) - if (match(&I, m_Intrinsic())) - AC->unregisterAssumption(cast(&I)); } } @@ -1378,6 +1371,15 @@ Function *CodeExtractor::extractCodeRegion() { } } + if (AC) { + // Remove @llvm.assume calls that were moved to the new function from the + // old function's assumption cache. + for (BasicBlock *Block : Blocks) + for (auto &I : *Block) + if (match(&I, m_Intrinsic())) + AC->unregisterAssumption(cast(&I)); + } + // If we have any return instructions in the region, split those blocks so // that the return is not in the region. splitReturnBlocks(); @@ -1568,5 +1570,17 @@ Function *CodeExtractor::extractCodeRegion() { }); LLVM_DEBUG(if (verifyFunction(*oldFunction)) report_fatal_error("verification of oldFunction failed!")); + LLVM_DEBUG(if (AC && verifyAssumptionCache(*oldFunction, AC)) + report_fatal_error("Stale Asumption cache for old Function!")); return newFunction; } + +bool CodeExtractor::verifyAssumptionCache(const Function& F, + AssumptionCache *AC) { + for (auto AssumeVH : AC->assumptions()) { + CallInst *I = cast(AssumeVH); + if (I->getFunction() != &F) + return true; + } + return false; +} diff --git a/llvm/test/Transforms/HotColdSplit/assumption-cache-invalidation.ll b/llvm/test/Transforms/HotColdSplit/assumption-cache-invalidation.ll index 811b50783a5c5..fbf2061ff650a 100644 --- a/llvm/test/Transforms/HotColdSplit/assumption-cache-invalidation.ll +++ b/llvm/test/Transforms/HotColdSplit/assumption-cache-invalidation.ll @@ -1,3 +1,5 @@ +; REQUIRES: asserts +; RUN: opt -S -instsimplify -hotcoldsplit -debug < %s 2>&1 | FileCheck %s ; RUN: opt -instcombine -hotcoldsplit -instsimplify %s -o /dev/null target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" @@ -6,6 +8,16 @@ target triple = "aarch64" %a = type { i64, i64 } %b = type { i64 } +; CHECK: @f +; CHECK-LABEL: codeRepl: +; CHECK-NOT: @llvm.assume +; CHECK: } +; CHECK: declare {{.*}}@llvm.assume +; CHECK: define {{.*}}@f.cold.1(i64 %0) +; CHECK-LABEL: newFuncRoot: +; CHECK: %1 = icmp eq i64 %0, 0 +; CHECK: call void @llvm.assume(i1 %1) + define void @f() { entry: %0 = getelementptr inbounds %a, %a* null, i64 0, i32 1 From fadc84ae9a959d9d5b0642fce4dff7a1f32d63ad Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Fri, 4 Oct 2019 22:50:23 +0000 Subject: [PATCH 022/254] [libc++] Localize common build flags into a single CMake function Also, set those flags for the cxx_experimental target. Otherwise, cxx_experimental doesn't build properly when neither the static nor the shared library is compiled (yes, that is a weird setup). llvm-svn: 373808 --- libcxx/CMakeLists.txt | 11 +++++++++++ libcxx/src/CMakeLists.txt | 30 ++++++++++-------------------- 2 files changed, 21 insertions(+), 20 deletions(-) diff --git a/libcxx/CMakeLists.txt b/libcxx/CMakeLists.txt index 335e7bc61a269..1e6227bf01375 100644 --- a/libcxx/CMakeLists.txt +++ b/libcxx/CMakeLists.txt @@ -831,6 +831,17 @@ function(cxx_add_config_site target) endif() endfunction() +# Setup all common build flags ================================================= +function(cxx_add_common_build_flags target) + cxx_add_basic_build_flags(${target}) + cxx_add_warning_flags(${target}) + cxx_add_windows_flags(${target}) + cxx_add_config_site(${target}) + cxx_add_exception_flags(${target}) + cxx_add_rtti_flags(${target}) + cxx_add_module_flags(${target}) +endfunction() + #=============================================================================== # Setup Source Code And Tests #=============================================================================== diff --git a/libcxx/src/CMakeLists.txt b/libcxx/src/CMakeLists.txt index d0a510861a1a1..1b18850463773 100644 --- a/libcxx/src/CMakeLists.txt +++ b/libcxx/src/CMakeLists.txt @@ -230,14 +230,8 @@ if (LIBCXX_ENABLE_SHARED) SOVERSION "${LIBCXX_ABI_VERSION}" DEFINE_SYMBOL "" ) - cxx_add_basic_build_flags(cxx_shared) + cxx_add_common_build_flags(cxx_shared) cxx_set_common_defines(cxx_shared) - cxx_add_warning_flags(cxx_shared) - cxx_add_windows_flags(cxx_shared) - cxx_add_config_site(cxx_shared) - cxx_add_exception_flags(cxx_shared) - cxx_add_rtti_flags(cxx_shared) - cxx_add_module_flags(cxx_shared) # Link against LLVM libunwind if (LIBCXXABI_USE_LLVM_UNWINDER) @@ -337,14 +331,8 @@ if (LIBCXX_ENABLE_STATIC) LINK_FLAGS "${LIBCXX_LINK_FLAGS}" OUTPUT_NAME "c++" ) - cxx_add_basic_build_flags(cxx_static) + cxx_add_common_build_flags(cxx_static) cxx_set_common_defines(cxx_static) - cxx_add_warning_flags(cxx_static) - cxx_add_windows_flags(cxx_static) - cxx_add_config_site(cxx_static) - cxx_add_exception_flags(cxx_static) - cxx_add_rtti_flags(cxx_static) - cxx_add_module_flags(cxx_static) if (LIBCXX_HERMETIC_STATIC_LIBRARY) # If the hermetic library doesn't define the operator new/delete functions @@ -402,17 +390,19 @@ if (LIBCXX_ENABLE_EXPERIMENTAL_LIBRARY) target_link_libraries(cxx_experimental cxx_static) endif() - set(experimental_flags "${LIBCXX_COMPILE_FLAGS}") - check_flag_supported(-std=c++14) - if (NOT MSVC AND LIBCXX_SUPPORTS_STD_EQ_CXX14_FLAG) - string(REPLACE "-std=c++11" "-std=c++14" experimental_flags "${LIBCXX_COMPILE_FLAGS}") - endif() set_target_properties(cxx_experimental PROPERTIES - COMPILE_FLAGS "${experimental_flags}" + COMPILE_FLAGS "${LIBCXX_COMPILE_FLAGS}" OUTPUT_NAME "c++experimental" ) endif() +cxx_add_common_build_flags(cxx_experimental) + +# Overwrite the previously-set Standard flag with -std=c++14 if supported +check_flag_supported(-std=c++14) +if (NOT MSVC AND LIBCXX_SUPPORTS_STD_EQ_CXX14_FLAG) + target_compile_options(cxx_experimental PRIVATE "-std=c++14") +endif() if (LIBCXX_BUILD_EXTERNAL_THREAD_LIBRARY) file(GLOB LIBCXX_EXTERNAL_THREADING_SUPPORT_SOURCES ../test/support/external_threads.cpp) From 9627b26c40a19ce904921148f7be4cb84fd9e796 Mon Sep 17 00:00:00 2001 From: Alex Langford Date: Fri, 4 Oct 2019 23:08:20 +0000 Subject: [PATCH 023/254] [libc++] Guard cxx_experimental settings behind LIBCXX_ENABLE_EXPERIMENTAL_LIBRARY If you explicitly set LIBCXX_ENABLE_EXPERIMENTAL_LIBRARY to OFF, your project will fail to configure because the cxx_experimental target doesn't exist. llvm-svn: 373809 --- libcxx/src/CMakeLists.txt | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/libcxx/src/CMakeLists.txt b/libcxx/src/CMakeLists.txt index 1b18850463773..03bc126be5672 100644 --- a/libcxx/src/CMakeLists.txt +++ b/libcxx/src/CMakeLists.txt @@ -395,14 +395,14 @@ if (LIBCXX_ENABLE_EXPERIMENTAL_LIBRARY) COMPILE_FLAGS "${LIBCXX_COMPILE_FLAGS}" OUTPUT_NAME "c++experimental" ) + cxx_add_common_build_flags(cxx_experimental) + # Overwrite the previously-set Standard flag with -std=c++14 if supported + check_flag_supported(-std=c++14) + if (NOT MSVC AND LIBCXX_SUPPORTS_STD_EQ_CXX14_FLAG) + target_compile_options(cxx_experimental PRIVATE "-std=c++14") + endif() endif() -cxx_add_common_build_flags(cxx_experimental) -# Overwrite the previously-set Standard flag with -std=c++14 if supported -check_flag_supported(-std=c++14) -if (NOT MSVC AND LIBCXX_SUPPORTS_STD_EQ_CXX14_FLAG) - target_compile_options(cxx_experimental PRIVATE "-std=c++14") -endif() if (LIBCXX_BUILD_EXTERNAL_THREAD_LIBRARY) file(GLOB LIBCXX_EXTERNAL_THREADING_SUPPORT_SOURCES ../test/support/external_threads.cpp) From 4e5d9e120be36a4518c85383d5231db8416c2d96 Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Fri, 4 Oct 2019 23:09:55 +0000 Subject: [PATCH 024/254] [MachO] Reduce indentation further in ParseSymtab (NFC) llvm-svn: 373810 --- .../ObjectFile/Mach-O/ObjectFileMachO.cpp | 353 +++++++++--------- 1 file changed, 177 insertions(+), 176 deletions(-) diff --git a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp index 81b7397a87658..15f221e58fda9 100644 --- a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp +++ b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp @@ -2038,8 +2038,9 @@ UUID ObjectFileMachO::GetSharedCacheUUID(FileSpec dyld_shared_cache, return dsc_uuid; } -bool ParseNList(DataExtractor &nlist_data, lldb::offset_t &nlist_data_offset, - size_t nlist_byte_size, struct nlist_64 &nlist) { +static bool ParseNList(DataExtractor &nlist_data, + lldb::offset_t &nlist_data_offset, + size_t nlist_byte_size, struct nlist_64 &nlist) { if (!nlist_data.ValidOffsetForDataOfSize(nlist_data_offset, nlist_byte_size)) return false; nlist.n_strx = nlist_data.GetU32_unchecked(&nlist_data_offset); @@ -4268,205 +4269,205 @@ size_t ObjectFileMachO::ParseSymtab() { } } - if (add_nlist) { - uint64_t symbol_value = nlist.n_value; + if (!add_nlist) { + sym[sym_idx].Clear(); + continue; + } - if (symbol_name_non_abi_mangled) { - sym[sym_idx].GetMangled().SetMangledName( - ConstString(symbol_name_non_abi_mangled)); - sym[sym_idx].GetMangled().SetDemangledName(ConstString(symbol_name)); - } else { - bool symbol_name_is_mangled = false; + uint64_t symbol_value = nlist.n_value; - if (symbol_name && symbol_name[0] == '_') { - symbol_name_is_mangled = symbol_name[1] == '_'; - symbol_name++; // Skip the leading underscore - } + if (symbol_name_non_abi_mangled) { + sym[sym_idx].GetMangled().SetMangledName( + ConstString(symbol_name_non_abi_mangled)); + sym[sym_idx].GetMangled().SetDemangledName(ConstString(symbol_name)); + } else { + bool symbol_name_is_mangled = false; - if (symbol_name) { - ConstString const_symbol_name(symbol_name); - sym[sym_idx].GetMangled().SetValue(const_symbol_name, - symbol_name_is_mangled); - } + if (symbol_name && symbol_name[0] == '_') { + symbol_name_is_mangled = symbol_name[1] == '_'; + symbol_name++; // Skip the leading underscore } - if (is_gsym) { - const char *gsym_name = - sym[sym_idx] - .GetMangled() - .GetName(lldb::eLanguageTypeUnknown, Mangled::ePreferMangled) - .GetCString(); - if (gsym_name) - N_GSYM_name_to_sym_idx[gsym_name] = sym_idx; + if (symbol_name) { + ConstString const_symbol_name(symbol_name); + sym[sym_idx].GetMangled().SetValue(const_symbol_name, + symbol_name_is_mangled); } + } - if (symbol_section) { - const addr_t section_file_addr = symbol_section->GetFileAddress(); - if (symbol_byte_size == 0 && function_starts_count > 0) { - addr_t symbol_lookup_file_addr = nlist.n_value; - // Do an exact address match for non-ARM addresses, else get the - // closest since the symbol might be a thumb symbol which has an - // address with bit zero set - FunctionStarts::Entry *func_start_entry = - function_starts.FindEntry(symbol_lookup_file_addr, !is_arm); - if (is_arm && func_start_entry) { - // Verify that the function start address is the symbol address - // (ARM) or the symbol address + 1 (thumb) - if (func_start_entry->addr != symbol_lookup_file_addr && - func_start_entry->addr != (symbol_lookup_file_addr + 1)) { - // Not the right entry, NULL it out... - func_start_entry = nullptr; - } - } - if (func_start_entry) { - func_start_entry->data = true; + if (is_gsym) { + const char *gsym_name = + sym[sym_idx] + .GetMangled() + .GetName(lldb::eLanguageTypeUnknown, Mangled::ePreferMangled) + .GetCString(); + if (gsym_name) + N_GSYM_name_to_sym_idx[gsym_name] = sym_idx; + } - addr_t symbol_file_addr = func_start_entry->addr; + if (symbol_section) { + const addr_t section_file_addr = symbol_section->GetFileAddress(); + if (symbol_byte_size == 0 && function_starts_count > 0) { + addr_t symbol_lookup_file_addr = nlist.n_value; + // Do an exact address match for non-ARM addresses, else get the + // closest since the symbol might be a thumb symbol which has an + // address with bit zero set. + FunctionStarts::Entry *func_start_entry = + function_starts.FindEntry(symbol_lookup_file_addr, !is_arm); + if (is_arm && func_start_entry) { + // Verify that the function start address is the symbol address + // (ARM) or the symbol address + 1 (thumb). + if (func_start_entry->addr != symbol_lookup_file_addr && + func_start_entry->addr != (symbol_lookup_file_addr + 1)) { + // Not the right entry, NULL it out... + func_start_entry = nullptr; + } + } + if (func_start_entry) { + func_start_entry->data = true; + + addr_t symbol_file_addr = func_start_entry->addr; + if (is_arm) + symbol_file_addr &= THUMB_ADDRESS_BIT_MASK; + + const FunctionStarts::Entry *next_func_start_entry = + function_starts.FindNextEntry(func_start_entry); + const addr_t section_end_file_addr = + section_file_addr + symbol_section->GetByteSize(); + if (next_func_start_entry) { + addr_t next_symbol_file_addr = next_func_start_entry->addr; + // Be sure the clear the Thumb address bit when we calculate the + // size from the current and next address if (is_arm) - symbol_file_addr &= THUMB_ADDRESS_BIT_MASK; - - const FunctionStarts::Entry *next_func_start_entry = - function_starts.FindNextEntry(func_start_entry); - const addr_t section_end_file_addr = - section_file_addr + symbol_section->GetByteSize(); - if (next_func_start_entry) { - addr_t next_symbol_file_addr = next_func_start_entry->addr; - // Be sure the clear the Thumb address bit when we calculate - // the size from the current and next address - if (is_arm) - next_symbol_file_addr &= THUMB_ADDRESS_BIT_MASK; - symbol_byte_size = std::min( - next_symbol_file_addr - symbol_file_addr, - section_end_file_addr - symbol_file_addr); - } else { - symbol_byte_size = section_end_file_addr - symbol_file_addr; - } + next_symbol_file_addr &= THUMB_ADDRESS_BIT_MASK; + symbol_byte_size = std::min( + next_symbol_file_addr - symbol_file_addr, + section_end_file_addr - symbol_file_addr); + } else { + symbol_byte_size = section_end_file_addr - symbol_file_addr; } } - symbol_value -= section_file_addr; } + symbol_value -= section_file_addr; + } - if (!is_debug) { - if (type == eSymbolTypeCode) { - // See if we can find a N_FUN entry for any code symbols. If we - // do find a match, and the name matches, then we can merge the - // two into just the function symbol to avoid duplicate entries - // in the symbol table - std::pair - range; - range = N_FUN_addr_to_sym_idx.equal_range(nlist.n_value); - if (range.first != range.second) { - bool found_it = false; - for (ValueToSymbolIndexMap::const_iterator pos = range.first; - pos != range.second; ++pos) { - if (sym[sym_idx].GetMangled().GetName( - lldb::eLanguageTypeUnknown, Mangled::ePreferMangled) == - sym[pos->second].GetMangled().GetName( - lldb::eLanguageTypeUnknown, Mangled::ePreferMangled)) { - m_nlist_idx_to_sym_idx[nlist_idx] = pos->second; - // We just need the flags from the linker symbol, so put - // these flags into the N_FUN flags to avoid duplicate - // symbols in the symbol table - sym[pos->second].SetExternal(sym[sym_idx].IsExternal()); - sym[pos->second].SetFlags(nlist.n_type << 16 | nlist.n_desc); - if (resolver_addresses.find(nlist.n_value) != - resolver_addresses.end()) - sym[pos->second].SetType(eSymbolTypeResolver); - sym[sym_idx].Clear(); - found_it = true; - break; - } + if (!is_debug) { + if (type == eSymbolTypeCode) { + // See if we can find a N_FUN entry for any code symbols. If we do + // find a match, and the name matches, then we can merge the two into + // just the function symbol to avoid duplicate entries in the symbol + // table. + std::pair + range; + range = N_FUN_addr_to_sym_idx.equal_range(nlist.n_value); + if (range.first != range.second) { + bool found_it = false; + for (ValueToSymbolIndexMap::const_iterator pos = range.first; + pos != range.second; ++pos) { + if (sym[sym_idx].GetMangled().GetName(lldb::eLanguageTypeUnknown, + Mangled::ePreferMangled) == + sym[pos->second].GetMangled().GetName( + lldb::eLanguageTypeUnknown, Mangled::ePreferMangled)) { + m_nlist_idx_to_sym_idx[nlist_idx] = pos->second; + // We just need the flags from the linker symbol, so put these + // flags into the N_FUN flags to avoid duplicate symbols in the + // symbol table. + sym[pos->second].SetExternal(sym[sym_idx].IsExternal()); + sym[pos->second].SetFlags(nlist.n_type << 16 | nlist.n_desc); + if (resolver_addresses.find(nlist.n_value) != + resolver_addresses.end()) + sym[pos->second].SetType(eSymbolTypeResolver); + sym[sym_idx].Clear(); + found_it = true; + break; } - if (found_it) - continue; - } else { - if (resolver_addresses.find(nlist.n_value) != - resolver_addresses.end()) - type = eSymbolTypeResolver; } - } else if (type == eSymbolTypeData || type == eSymbolTypeObjCClass || - type == eSymbolTypeObjCMetaClass || - type == eSymbolTypeObjCIVar) { - // See if we can find a N_STSYM entry for any data symbols. If we - // do find a match, and the name matches, then we can merge the - // two into just the Static symbol to avoid duplicate entries in - // the symbol table - std::pair - range; - range = N_STSYM_addr_to_sym_idx.equal_range(nlist.n_value); - if (range.first != range.second) { - bool found_it = false; - for (ValueToSymbolIndexMap::const_iterator pos = range.first; - pos != range.second; ++pos) { - if (sym[sym_idx].GetMangled().GetName( - lldb::eLanguageTypeUnknown, Mangled::ePreferMangled) == - sym[pos->second].GetMangled().GetName( - lldb::eLanguageTypeUnknown, Mangled::ePreferMangled)) { - m_nlist_idx_to_sym_idx[nlist_idx] = pos->second; - // We just need the flags from the linker symbol, so put - // these flags into the N_STSYM flags to avoid duplicate - // symbols in the symbol table - sym[pos->second].SetExternal(sym[sym_idx].IsExternal()); - sym[pos->second].SetFlags(nlist.n_type << 16 | nlist.n_desc); - sym[sym_idx].Clear(); - found_it = true; - break; - } + if (found_it) + continue; + } else { + if (resolver_addresses.find(nlist.n_value) != + resolver_addresses.end()) + type = eSymbolTypeResolver; + } + } else if (type == eSymbolTypeData || type == eSymbolTypeObjCClass || + type == eSymbolTypeObjCMetaClass || + type == eSymbolTypeObjCIVar) { + // See if we can find a N_STSYM entry for any data symbols. If we do + // find a match, and the name matches, then we can merge the two into + // just the Static symbol to avoid duplicate entries in the symbol + // table. + std::pair + range; + range = N_STSYM_addr_to_sym_idx.equal_range(nlist.n_value); + if (range.first != range.second) { + bool found_it = false; + for (ValueToSymbolIndexMap::const_iterator pos = range.first; + pos != range.second; ++pos) { + if (sym[sym_idx].GetMangled().GetName(lldb::eLanguageTypeUnknown, + Mangled::ePreferMangled) == + sym[pos->second].GetMangled().GetName( + lldb::eLanguageTypeUnknown, Mangled::ePreferMangled)) { + m_nlist_idx_to_sym_idx[nlist_idx] = pos->second; + // We just need the flags from the linker symbol, so put these + // flags into the N_STSYM flags to avoid duplicate symbols in + // the symbol table. + sym[pos->second].SetExternal(sym[sym_idx].IsExternal()); + sym[pos->second].SetFlags(nlist.n_type << 16 | nlist.n_desc); + sym[sym_idx].Clear(); + found_it = true; + break; } - if (found_it) + } + if (found_it) + continue; + } else { + // Combine N_GSYM stab entries with the non stab symbol. + const char *gsym_name = sym[sym_idx] + .GetMangled() + .GetName(lldb::eLanguageTypeUnknown, + Mangled::ePreferMangled) + .GetCString(); + if (gsym_name) { + ConstNameToSymbolIndexMap::const_iterator pos = + N_GSYM_name_to_sym_idx.find(gsym_name); + if (pos != N_GSYM_name_to_sym_idx.end()) { + const uint32_t GSYM_sym_idx = pos->second; + m_nlist_idx_to_sym_idx[nlist_idx] = GSYM_sym_idx; + // Copy the address, because often the N_GSYM address has an + // invalid address of zero when the global is a common symbol. + sym[GSYM_sym_idx].GetAddressRef().SetSection(symbol_section); + sym[GSYM_sym_idx].GetAddressRef().SetOffset(symbol_value); + // We just need the flags from the linker symbol, so put these + // flags into the N_GSYM flags to avoid duplicate symbols in + // the symbol table. + sym[GSYM_sym_idx].SetFlags(nlist.n_type << 16 | nlist.n_desc); + sym[sym_idx].Clear(); continue; - } else { - // Combine N_GSYM stab entries with the non stab symbol - const char *gsym_name = sym[sym_idx] - .GetMangled() - .GetName(lldb::eLanguageTypeUnknown, - Mangled::ePreferMangled) - .GetCString(); - if (gsym_name) { - ConstNameToSymbolIndexMap::const_iterator pos = - N_GSYM_name_to_sym_idx.find(gsym_name); - if (pos != N_GSYM_name_to_sym_idx.end()) { - const uint32_t GSYM_sym_idx = pos->second; - m_nlist_idx_to_sym_idx[nlist_idx] = GSYM_sym_idx; - // Copy the address, because often the N_GSYM address has - // an invalid address of zero when the global is a common - // symbol - sym[GSYM_sym_idx].GetAddressRef().SetSection(symbol_section); - sym[GSYM_sym_idx].GetAddressRef().SetOffset(symbol_value); - // We just need the flags from the linker symbol, so put - // these flags into the N_GSYM flags to avoid duplicate - // symbols in the symbol table - sym[GSYM_sym_idx].SetFlags(nlist.n_type << 16 | nlist.n_desc); - sym[sym_idx].Clear(); - continue; - } } } } } + } - sym[sym_idx].SetID(nlist_idx); - sym[sym_idx].SetType(type); - if (set_value) { - sym[sym_idx].GetAddressRef().SetSection(symbol_section); - sym[sym_idx].GetAddressRef().SetOffset(symbol_value); - } - sym[sym_idx].SetFlags(nlist.n_type << 16 | nlist.n_desc); - if (nlist.n_desc & N_WEAK_REF) - sym[sym_idx].SetIsWeak(true); + sym[sym_idx].SetID(nlist_idx); + sym[sym_idx].SetType(type); + if (set_value) { + sym[sym_idx].GetAddressRef().SetSection(symbol_section); + sym[sym_idx].GetAddressRef().SetOffset(symbol_value); + } + sym[sym_idx].SetFlags(nlist.n_type << 16 | nlist.n_desc); + if (nlist.n_desc & N_WEAK_REF) + sym[sym_idx].SetIsWeak(true); - if (symbol_byte_size > 0) - sym[sym_idx].SetByteSize(symbol_byte_size); + if (symbol_byte_size > 0) + sym[sym_idx].SetByteSize(symbol_byte_size); - if (demangled_is_synthesized) - sym[sym_idx].SetDemangledNameIsSynthesized(true); + if (demangled_is_synthesized) + sym[sym_idx].SetDemangledNameIsSynthesized(true); - ++sym_idx; - } else { - sym[sym_idx].Clear(); - } + ++sym_idx; } for (const auto &pos : reexport_shlib_needs_fixup) { From 50afaa9d34d6447b04286335d9e85bd70637ecff Mon Sep 17 00:00:00 2001 From: Aditya Kumar Date: Fri, 4 Oct 2019 23:36:59 +0000 Subject: [PATCH 025/254] Add a unittest to verify for assumption cache Reviewers: vsk, tejohnson Reviewed By: vsk Differential Revision: https://reviews.llvm.org/D68095 llvm-svn: 373811 --- .../Transforms/Utils/CodeExtractorTest.cpp | 52 +++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp b/llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp index 8b86951fa5e19..9213be726970c 100644 --- a/llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp +++ b/llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp @@ -8,6 +8,7 @@ #include "llvm/Transforms/Utils/CodeExtractor.h" #include "llvm/AsmParser/Parser.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Instructions.h" @@ -225,4 +226,55 @@ TEST(CodeExtractor, StoreOutputInvokeResultInExitStub) { EXPECT_FALSE(verifyFunction(*Func)); } +TEST(CodeExtractor, ExtractAndInvalidateAssumptionCache) { + LLVMContext Ctx; + SMDiagnostic Err; + std::unique_ptr M(parseAssemblyString(R"ir( + target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" + target triple = "aarch64" + + %b = type { i64 } + declare void @g(i8*) + + declare void @llvm.assume(i1) #0 + + define void @test() { + entry: + br label %label + + label: + %0 = load %b*, %b** inttoptr (i64 8 to %b**), align 8 + %1 = getelementptr inbounds %b, %b* %0, i64 undef, i32 0 + %2 = load i64, i64* %1, align 8 + %3 = icmp ugt i64 %2, 1 + br i1 %3, label %if.then, label %if.else + + if.then: + unreachable + + if.else: + call void @g(i8* undef) + store i64 undef, i64* null, align 536870912 + %4 = icmp eq i64 %2, 0 + call void @llvm.assume(i1 %4) + unreachable + } + + attributes #0 = { nounwind willreturn } + )ir", + Err, Ctx)); + + assert(M && "Could not parse module?"); + Function *Func = M->getFunction("test"); + SmallVector Blocks{ getBlockByName(Func, "if.else") }; + AssumptionCache AC(*Func); + CodeExtractor CE(Blocks, nullptr, false, nullptr, nullptr, &AC); + EXPECT_TRUE(CE.isEligible()); + + Function *Outlined = CE.extractCodeRegion(); + EXPECT_TRUE(Outlined); + EXPECT_FALSE(verifyFunction(*Outlined)); + EXPECT_FALSE(verifyFunction(*Func)); + EXPECT_FALSE(CE.verifyAssumptionCache(*Func, &AC)); +} } // end anonymous namespace From ea835f5ce84124f74744ff394d11202ab46847d6 Mon Sep 17 00:00:00 2001 From: Ana Pazos Date: Fri, 4 Oct 2019 23:42:07 +0000 Subject: [PATCH 026/254] [RISCV] Added missing ImmLeaf predicates simm9_lsb0 and simm12_lsb0 operand types were missing predicates. llvm-svn: 373812 --- llvm/lib/Target/RISCV/RISCVInstrInfoC.td | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoC.td b/llvm/lib/Target/RISCV/RISCVInstrInfoC.td index a8809a8fbad6b..fa0050f107b29 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoC.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoC.td @@ -137,7 +137,8 @@ def uimm8_lsb000 : Operand, } // A 9-bit signed immediate where the least significant bit is zero. -def simm9_lsb0 : Operand { +def simm9_lsb0 : Operand, + ImmLeaf(Imm);}]> { let ParserMatchClass = SImmAsmOperand<9, "Lsb0">; let EncoderMethod = "getImmOpValueAsr1"; let DecoderMethod = "decodeSImmOperandAndLsl1<9>"; @@ -196,7 +197,8 @@ def simm10_lsb0000nonzero : Operand, } // A 12-bit signed immediate where the least significant bit is zero. -def simm12_lsb0 : Operand { +def simm12_lsb0 : Operand, + ImmLeaf(Imm);}]> { let ParserMatchClass = SImmAsmOperand<12, "Lsb0">; let EncoderMethod = "getImmOpValueAsr1"; let DecoderMethod = "decodeSImmOperandAndLsl1<12>"; From 9fe5d730c7070fa64c292699265c26f24c96003e Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Fri, 4 Oct 2019 23:46:26 +0000 Subject: [PATCH 027/254] [Test] Add a test case fo a missed oppurtunity in implicit null checking llvm-svn: 373813 --- llvm/test/CodeGen/X86/implicit-null-check.ll | 68 ++++++++++++++++++-- 1 file changed, 62 insertions(+), 6 deletions(-) diff --git a/llvm/test/CodeGen/X86/implicit-null-check.ll b/llvm/test/CodeGen/X86/implicit-null-check.ll index 5b0790f699aac..6d6b31f86dbe9 100644 --- a/llvm/test/CodeGen/X86/implicit-null-check.ll +++ b/llvm/test/CodeGen/X86/implicit-null-check.ll @@ -432,14 +432,70 @@ define i32 @imp_null_check_gep_load_with_use_dep(i32* %x, i32 %a) { ret i32 %z } +;; TODO: We could handle this case as we can lift the fence into the +;; previous block before the conditional without changing behavior. +define i32 @imp_null_check_load_fence1(i32* %x) { +; CHECK-LABEL: imp_null_check_load_fence1: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: testq %rdi, %rdi +; CHECK-NEXT: je LBB16_1 +; CHECK-NEXT: ## %bb.2: ## %not_null +; CHECK-NEXT: ##MEMBARRIER +; CHECK-NEXT: movl (%rdi), %eax +; CHECK-NEXT: retq +; CHECK-NEXT: LBB16_1: ## %is_null +; CHECK-NEXT: movl $42, %eax +; CHECK-NEXT: retq + +entry: + %c = icmp eq i32* %x, null + br i1 %c, label %is_null, label %not_null, !make.implicit !0 + +is_null: + ret i32 42 + +not_null: + fence acquire + %t = load i32, i32* %x + ret i32 %t +} + +;; TODO: We could handle this case as we can lift the fence into the +;; previous block before the conditional without changing behavior. +define i32 @imp_null_check_load_fence2(i32* %x) { +; CHECK-LABEL: imp_null_check_load_fence2: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: testq %rdi, %rdi +; CHECK-NEXT: je LBB17_1 +; CHECK-NEXT: ## %bb.2: ## %not_null +; CHECK-NEXT: mfence +; CHECK-NEXT: movl (%rdi), %eax +; CHECK-NEXT: retq +; CHECK-NEXT: LBB17_1: ## %is_null +; CHECK-NEXT: movl $42, %eax +; CHECK-NEXT: retq + +entry: + %c = icmp eq i32* %x, null + br i1 %c, label %is_null, label %not_null, !make.implicit !0 + +is_null: + ret i32 42 + +not_null: + fence seq_cst + %t = load i32, i32* %x + ret i32 %t +} + define void @imp_null_check_store(i32* %x) { ; CHECK-LABEL: imp_null_check_store: ; CHECK: ## %bb.0: ## %entry ; CHECK-NEXT: Ltmp14: -; CHECK-NEXT: movl $1, (%rdi) ## on-fault: LBB16_1 +; CHECK-NEXT: movl $1, (%rdi) ## on-fault: LBB18_1 ; CHECK-NEXT: ## %bb.2: ## %not_null ; CHECK-NEXT: retq -; CHECK-NEXT: LBB16_1: ## %is_null +; CHECK-NEXT: LBB18_1: ## %is_null ; CHECK-NEXT: retq entry: @@ -459,10 +515,10 @@ define void @imp_null_check_unordered_store(i32* %x) { ; CHECK-LABEL: imp_null_check_unordered_store: ; CHECK: ## %bb.0: ## %entry ; CHECK-NEXT: Ltmp15: -; CHECK-NEXT: movl $1, (%rdi) ## on-fault: LBB17_1 +; CHECK-NEXT: movl $1, (%rdi) ## on-fault: LBB19_1 ; CHECK-NEXT: ## %bb.2: ## %not_null ; CHECK-NEXT: retq -; CHECK-NEXT: LBB17_1: ## %is_null +; CHECK-NEXT: LBB19_1: ## %is_null ; CHECK-NEXT: retq entry: @@ -481,10 +537,10 @@ define i32 @imp_null_check_neg_gep_load(i32* %x) { ; CHECK-LABEL: imp_null_check_neg_gep_load: ; CHECK: ## %bb.0: ## %entry ; CHECK-NEXT: Ltmp16: -; CHECK-NEXT: movl -128(%rdi), %eax ## on-fault: LBB18_1 +; CHECK-NEXT: movl -128(%rdi), %eax ## on-fault: LBB20_1 ; CHECK-NEXT: ## %bb.2: ## %not_null ; CHECK-NEXT: retq -; CHECK-NEXT: LBB18_1: ## %is_null +; CHECK-NEXT: LBB20_1: ## %is_null ; CHECK-NEXT: movl $42, %eax ; CHECK-NEXT: retq From d5a4dad2061c09b01f396b3958ccccc4f9727b1a Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Sat, 5 Oct 2019 00:32:10 +0000 Subject: [PATCH 028/254] Fix a *nasty* miscompile in experimental unordered atomic lowering This is an omission in rL371441. Loads which happened to be unordered weren't being added to the PendingLoad set, and thus weren't be ordered w/respect to side effects which followed before the end of the block. Included test case is how I spotted this. We had an atomic load being folded into a using instruction after a fence that load was supposed to be ordered with. I'm sure it showed up a bunch of other ways as well. Spotted via manual inspecting of assembly differences in a corpus w/and w/o the new experimental mode. Finding this with testing would have been "unpleasant". llvm-svn: 373814 --- .../SelectionDAG/SelectionDAGBuilder.cpp | 7 +- llvm/test/CodeGen/X86/atomic-unordered.ll | 78 ++++++++++++++----- 2 files changed, 62 insertions(+), 23 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index c6587188bc005..31cecc01d9d3f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -4672,10 +4672,11 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { L = DAG.getPtrExtOrTrunc(L, dl, VT); setValue(&I, L); - if (!I.isUnordered()) { - SDValue OutChain = L.getValue(1); + SDValue OutChain = L.getValue(1); + if (!I.isUnordered()) DAG.setRoot(OutChain); - } + else + PendingLoads.push_back(OutChain); return; } diff --git a/llvm/test/CodeGen/X86/atomic-unordered.ll b/llvm/test/CodeGen/X86/atomic-unordered.ll index 4d17c0584b951..35055a5adca8f 100644 --- a/llvm/test/CodeGen/X86/atomic-unordered.ll +++ b/llvm/test/CodeGen/X86/atomic-unordered.ll @@ -2315,22 +2315,11 @@ define i64 @constant_folding(i64* %p) { ; Legal to forward and fold (TODO) define i64 @load_forwarding(i64* %p) { -; CHECK-O0-LABEL: load_forwarding: -; CHECK-O0: # %bb.0: -; CHECK-O0-NEXT: movq (%rdi), %rax -; CHECK-O0-NEXT: orq (%rdi), %rax -; CHECK-O0-NEXT: retq -; -; CHECK-O3-CUR-LABEL: load_forwarding: -; CHECK-O3-CUR: # %bb.0: -; CHECK-O3-CUR-NEXT: movq (%rdi), %rax -; CHECK-O3-CUR-NEXT: orq (%rdi), %rax -; CHECK-O3-CUR-NEXT: retq -; -; CHECK-O3-EX-LABEL: load_forwarding: -; CHECK-O3-EX: # %bb.0: -; CHECK-O3-EX-NEXT: movq (%rdi), %rax -; CHECK-O3-EX-NEXT: retq +; CHECK-LABEL: load_forwarding: +; CHECK: # %bb.0: +; CHECK-NEXT: movq (%rdi), %rax +; CHECK-NEXT: orq (%rdi), %rax +; CHECK-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %v2 = load atomic i64, i64* %p unordered, align 8 %ret = or i64 %v, %v2 @@ -2459,8 +2448,8 @@ define i64 @fold_constant_clobber(i64* %p, i64 %arg) { ; CHECK-O3-EX-LABEL: fold_constant_clobber: ; CHECK-O3-EX: # %bb.0: ; CHECK-O3-EX-NEXT: movq %rsi, %rax -; CHECK-O3-EX-NEXT: movq $5, (%rdi) ; CHECK-O3-EX-NEXT: addq {{.*}}(%rip), %rax +; CHECK-O3-EX-NEXT: movq $5, (%rdi) ; CHECK-O3-EX-NEXT: retq %v = load atomic i64, i64* @Constant unordered, align 8 store i64 5, i64* %p @@ -2486,8 +2475,8 @@ define i64 @fold_constant_fence(i64 %arg) { ; CHECK-O3-EX-LABEL: fold_constant_fence: ; CHECK-O3-EX: # %bb.0: ; CHECK-O3-EX-NEXT: movq %rdi, %rax -; CHECK-O3-EX-NEXT: mfence ; CHECK-O3-EX-NEXT: addq {{.*}}(%rip), %rax +; CHECK-O3-EX-NEXT: mfence ; CHECK-O3-EX-NEXT: retq %v = load atomic i64, i64* @Constant unordered, align 8 fence seq_cst @@ -2513,8 +2502,8 @@ define i64 @fold_invariant_clobber(i64* dereferenceable(8) %p, i64 %arg) { ; CHECK-O3-EX-LABEL: fold_invariant_clobber: ; CHECK-O3-EX: # %bb.0: ; CHECK-O3-EX-NEXT: movq %rsi, %rax -; CHECK-O3-EX-NEXT: movq $5, (%rdi) ; CHECK-O3-EX-NEXT: addq (%rdi), %rax +; CHECK-O3-EX-NEXT: movq $5, (%rdi) ; CHECK-O3-EX-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8, !invariant.load !{} store i64 5, i64* %p @@ -2541,8 +2530,8 @@ define i64 @fold_invariant_fence(i64* dereferenceable(8) %p, i64 %arg) { ; CHECK-O3-EX-LABEL: fold_invariant_fence: ; CHECK-O3-EX: # %bb.0: ; CHECK-O3-EX-NEXT: movq %rsi, %rax -; CHECK-O3-EX-NEXT: mfence ; CHECK-O3-EX-NEXT: addq (%rdi), %rax +; CHECK-O3-EX-NEXT: mfence ; CHECK-O3-EX-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8, !invariant.load !{} fence seq_cst @@ -2713,3 +2702,52 @@ define i16 @load_combine(i8* %p) { %res = or i16 %v1.ext, %v2.sht ret i16 %res } + +define i1 @fold_cmp_over_fence(i32* %p, i32 %v1) { +; CHECK-O0-LABEL: fold_cmp_over_fence: +; CHECK-O0: # %bb.0: +; CHECK-O0-NEXT: movl (%rdi), %eax +; CHECK-O0-NEXT: mfence +; CHECK-O0-NEXT: cmpl %eax, %esi +; CHECK-O0-NEXT: jne .LBB116_2 +; CHECK-O0-NEXT: # %bb.1: # %taken +; CHECK-O0-NEXT: movb $1, %al +; CHECK-O0-NEXT: retq +; CHECK-O0-NEXT: .LBB116_2: # %untaken +; CHECK-O0-NEXT: xorl %eax, %eax +; CHECK-O0-NEXT: # kill: def $al killed $al killed $eax +; CHECK-O0-NEXT: retq +; +; CHECK-O3-CUR-LABEL: fold_cmp_over_fence: +; CHECK-O3-CUR: # %bb.0: +; CHECK-O3-CUR-NEXT: movl (%rdi), %eax +; CHECK-O3-CUR-NEXT: mfence +; CHECK-O3-CUR-NEXT: cmpl %eax, %esi +; CHECK-O3-CUR-NEXT: jne .LBB116_2 +; CHECK-O3-CUR-NEXT: # %bb.1: # %taken +; CHECK-O3-CUR-NEXT: movb $1, %al +; CHECK-O3-CUR-NEXT: retq +; CHECK-O3-CUR-NEXT: .LBB116_2: # %untaken +; CHECK-O3-CUR-NEXT: xorl %eax, %eax +; CHECK-O3-CUR-NEXT: retq +; +; CHECK-O3-EX-LABEL: fold_cmp_over_fence: +; CHECK-O3-EX: # %bb.0: +; CHECK-O3-EX-NEXT: cmpl (%rdi), %esi +; CHECK-O3-EX-NEXT: mfence +; CHECK-O3-EX-NEXT: jne .LBB116_2 +; CHECK-O3-EX-NEXT: # %bb.1: # %taken +; CHECK-O3-EX-NEXT: movb $1, %al +; CHECK-O3-EX-NEXT: retq +; CHECK-O3-EX-NEXT: .LBB116_2: # %untaken +; CHECK-O3-EX-NEXT: xorl %eax, %eax +; CHECK-O3-EX-NEXT: retq + %v2 = load atomic i32, i32* %p unordered, align 4 + fence seq_cst + %cmp = icmp eq i32 %v1, %v2 + br i1 %cmp, label %taken, label %untaken +taken: + ret i1 true +untaken: + ret i1 false +} From 482f4d9aa9d1a4d991e6fd1fdd233f07bb69791f Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Sat, 5 Oct 2019 01:37:04 +0000 Subject: [PATCH 029/254] Expose ProvidePositionalOption as a public API The motivation is to reuse the key value parsing logic here to parse instance specific pass options within the context of MLIR. The primary functionality exposed is the "," splitting for arrays and the logic for properly handling duplicate definitions of a single flag. Patch by: Parker Schuh Differential Revision: https://reviews.llvm.org/D68294 llvm-svn: 373815 --- llvm/include/llvm/Support/CommandLine.h | 3 +++ llvm/lib/Support/CommandLine.cpp | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/llvm/include/llvm/Support/CommandLine.h b/llvm/include/llvm/Support/CommandLine.h index 3cc2c3c0121b2..63784463e1718 100644 --- a/llvm/include/llvm/Support/CommandLine.h +++ b/llvm/include/llvm/Support/CommandLine.h @@ -2000,6 +2000,9 @@ void ResetAllOptionOccurrences(); /// where no options are supported. void ResetCommandLineParser(); +/// Parses `Arg` into the option handler `Handler`. +bool ProvidePositionalOption(Option *Handler, StringRef Arg, int i); + } // end namespace cl } // end namespace llvm diff --git a/llvm/lib/Support/CommandLine.cpp b/llvm/lib/Support/CommandLine.cpp index 25510fa58ff54..620f7ffd4c9fa 100644 --- a/llvm/lib/Support/CommandLine.cpp +++ b/llvm/lib/Support/CommandLine.cpp @@ -692,7 +692,7 @@ static inline bool ProvideOption(Option *Handler, StringRef ArgName, return false; } -static bool ProvidePositionalOption(Option *Handler, StringRef Arg, int i) { +bool llvm::cl::ProvidePositionalOption(Option *Handler, StringRef Arg, int i) { int Dummy = i; return ProvideOption(Handler, Handler->ArgStr, Arg, 0, nullptr, Dummy); } From 559265c8daf7230d77b02f8566a6539e81edf416 Mon Sep 17 00:00:00 2001 From: David Bolvansky Date: Sat, 5 Oct 2019 08:02:11 +0000 Subject: [PATCH 030/254] [Diagnostics] Use Expr::isKnownToHaveBooleanValue() to check bitwise negation of bool in languages without a bool type Thanks for this advice, Richard Trieu! llvm-svn: 373817 --- clang/lib/Sema/SemaExpr.cpp | 2 +- clang/test/Sema/warn-bitwise-negation-bool.c | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 667441cbaa5f2..c18f54cefde69 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -13479,7 +13479,7 @@ ExprResult Sema::CreateBuiltinUnaryOp(SourceLocation OpLoc, // C99 does not support '~' for complex conjugation. Diag(OpLoc, diag::ext_integer_complement_complex) << resultType << Input.get()->getSourceRange(); - else if (Input.get()->IgnoreParenImpCasts()->getType()->isBooleanType()) + else if (Input.get()->isKnownToHaveBooleanValue()) Diag(OpLoc, diag::warn_bitwise_negation_bool) << FixItHint::CreateReplacement(OpLoc, "!"); else if (resultType->hasIntegerRepresentation()) diff --git a/clang/test/Sema/warn-bitwise-negation-bool.c b/clang/test/Sema/warn-bitwise-negation-bool.c index dfec00055cd53..435d783439c69 100644 --- a/clang/test/Sema/warn-bitwise-negation-bool.c +++ b/clang/test/Sema/warn-bitwise-negation-bool.c @@ -19,4 +19,6 @@ void test(boolean b, int i) { b = ~i; i = ~b; // expected-warning {{bitwise negation of a boolean expression; did you mean logical negation?}} // CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:7-[[@LINE-1]]:8}:"!" + b = ~(i > 4); // expected-warning {{bitwise negation of a boolean expression; did you mean logical negation?}} + // CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:7-[[@LINE-1]]:8}:"!" } From 3acc649b86fcbd3cdd88e70cd85f0ef8479b83c5 Mon Sep 17 00:00:00 2001 From: David Bolvansky Date: Sat, 5 Oct 2019 08:09:06 +0000 Subject: [PATCH 031/254] [NFCI] Slightly improve warning message llvm-svn: 373818 --- clang/include/clang/Basic/DiagnosticSemaKinds.td | 2 +- clang/test/SemaCXX/warn-xor-as-pow.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 9aa2792d855a9..c54380639a434 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -3402,7 +3402,7 @@ def warn_address_of_reference_bool_conversion : Warning< InGroup; def warn_xor_used_as_pow : Warning< - "result of '%0' is %1; did you mean an exponentiation?">, + "result of '%0' is %1; did you mean exponentiation?">, InGroup; def warn_xor_used_as_pow_base_extra : Warning< "result of '%0' is %1; did you mean '%2' (%3)?">, diff --git a/clang/test/SemaCXX/warn-xor-as-pow.cpp b/clang/test/SemaCXX/warn-xor-as-pow.cpp index e024c288d7f99..123d0ac5e0312 100644 --- a/clang/test/SemaCXX/warn-xor-as-pow.cpp +++ b/clang/test/SemaCXX/warn-xor-as-pow.cpp @@ -79,7 +79,7 @@ void test(unsigned a, unsigned b) { res = 2 ^ 32; // expected-warning {{result of '2 ^ 32' is 34; did you mean '1LL << 32'?}} // CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:9-[[@LINE-1]]:15}:"1LL << 32" // expected-note@-2 {{replace expression with '0x2 ^ 32' or use 'xor' instead of '^' to silence this warning}} - res = 2 ^ 64; // expected-warning {{result of '2 ^ 64' is 66; did you mean an exponentiation?}} + res = 2 ^ 64; // expected-warning {{result of '2 ^ 64' is 66; did you mean exponentiation?}} // expected-note@-1 {{replace expression with '0x2 ^ 64' or use 'xor' instead of '^' to silence this warning}} res = 2 ^ 65; From 68f58a75a76b17affeff90e4e250a1e790fda0ac Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Sat, 5 Oct 2019 08:22:40 +0000 Subject: [PATCH 032/254] Do not install lit-cpuid Summary: AFAIK, lit-cpuid is used by the tests. Installing it causes LLVMExports*.cmake files to depend on this program. It causes some serious packaging issues as it would means that llvm-dev depends on lldb. See: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=941082 https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=941306 See also https://bugs.llvm.org/show_bug.cgi?id=43035 for a similar issue caused by https://reviews.llvm.org/D56606 Reviewers: mgorny Reviewed By: mgorny Subscribers: delcypher, lldb-commits Differential Revision: https://reviews.llvm.org/D68537 llvm-svn: 373819 --- lldb/utils/lit-cpuid/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lldb/utils/lit-cpuid/CMakeLists.txt b/lldb/utils/lit-cpuid/CMakeLists.txt index bc9d31309e56a..f365447dd27df 100644 --- a/lldb/utils/lit-cpuid/CMakeLists.txt +++ b/lldb/utils/lit-cpuid/CMakeLists.txt @@ -1,4 +1,4 @@ -add_llvm_utility(lit-cpuid +add_lldb_executable(lit-cpuid lit-cpuid.cpp ) From b1f0183e572b32c0118c2e23910bf0c3dc37baad Mon Sep 17 00:00:00 2001 From: James Molloy Date: Sat, 5 Oct 2019 08:57:17 +0000 Subject: [PATCH 033/254] [UnitTests] Try and pacify gcc-5 This looks like a defect in gcc-5 where it chooses a constexpr constructor from the initializer-list that it considers to be explicit. I've tried to reproduce but I can't install anything prior to gcc-6 easily on my system, and that doesn't have the error. So this is speculative pacification. Reported by Steven Wan. llvm-svn: 373820 --- llvm/unittests/TableGen/AutomataTest.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/unittests/TableGen/AutomataTest.cpp b/llvm/unittests/TableGen/AutomataTest.cpp index 11c03426fd86f..fb19716c48499 100644 --- a/llvm/unittests/TableGen/AutomataTest.cpp +++ b/llvm/unittests/TableGen/AutomataTest.cpp @@ -62,16 +62,16 @@ TEST(Automata, TupleAutomatonAccepts) { Automaton A(makeArrayRef(TupleAutomatonTransitions)); A.reset(); EXPECT_TRUE( - A.add({SK_a, SK_b, "yeet"})); + A.add(TupleAutomatonAction{SK_a, SK_b, "yeet"})); A.reset(); EXPECT_FALSE( - A.add({SK_a, SK_a, "yeet"})); + A.add(TupleAutomatonAction{SK_a, SK_a, "yeet"})); A.reset(); EXPECT_FALSE( - A.add({SK_a, SK_b, "feet"})); + A.add(TupleAutomatonAction{SK_a, SK_b, "feet"})); A.reset(); EXPECT_TRUE( - A.add({SK_b, SK_b, "foo"})); + A.add(TupleAutomatonAction{SK_b, SK_b, "foo"})); } TEST(Automata, NfaAutomatonAccepts) { From 375a84bb75a8d00ad1af4c679b8bc42553ef66e1 Mon Sep 17 00:00:00 2001 From: Paul Hoad Date: Sat, 5 Oct 2019 09:55:23 +0000 Subject: [PATCH 034/254] [clang-format] SpacesInSquareBrackets should affect lambdas with parameters too Summary: This patch makes the `SpacesInSquareBrackets` setting also apply to C++ lambdas with parameters. Looking through the revision history, it appears support for only array brackets was added, and lambda brackets were ignored. Therefore, I am inclined to think it was simply an omission, rather than a deliberate choice. See https://bugs.llvm.org/show_bug.cgi?id=17887 and https://reviews.llvm.org/D4944. Reviewers: MyDeveloperDay, reuk, owenpan Reviewed By: MyDeveloperDay Subscribers: cfe-commits Patch by: mitchell-stellar Tags: #clang-format, #clang Differential Revision: https://reviews.llvm.org/D68473 llvm-svn: 373821 --- clang/docs/ClangFormatStyleOptions.rst | 3 ++- clang/lib/Format/TokenAnnotator.cpp | 7 ++++--- clang/unittests/Format/FormatTest.cpp | 7 +++---- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/clang/docs/ClangFormatStyleOptions.rst b/clang/docs/ClangFormatStyleOptions.rst index 24ad1f67f0895..459805d312aca 100644 --- a/clang/docs/ClangFormatStyleOptions.rst +++ b/clang/docs/ClangFormatStyleOptions.rst @@ -2301,7 +2301,8 @@ the configuration (without a prefix: ``Auto``). **SpacesInSquareBrackets** (``bool``) If ``true``, spaces will be inserted after ``[`` and before ``]``. - Lambdas or unspecified size array declarations will not be affected. + Lambdas without arguments or unspecified size array declarations will not be + affected. .. code-block:: c++ diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index a1073c8d60764..2cc69108b2f9e 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -2616,8 +2616,8 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, if (Left.is(tok::l_square)) return (Left.is(TT_ArrayInitializerLSquare) && Right.isNot(tok::r_square) && SpaceRequiredForArrayInitializerLSquare(Left, Style)) || - (Left.isOneOf(TT_ArraySubscriptLSquare, - TT_StructuredBindingLSquare) && + (Left.isOneOf(TT_ArraySubscriptLSquare, TT_StructuredBindingLSquare, + TT_LambdaLSquare) && Style.SpacesInSquareBrackets && Right.isNot(tok::r_square)); if (Right.is(tok::r_square)) return Right.MatchingParen && @@ -2626,7 +2626,8 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, Style)) || (Style.SpacesInSquareBrackets && Right.MatchingParen->isOneOf(TT_ArraySubscriptLSquare, - TT_StructuredBindingLSquare)) || + TT_StructuredBindingLSquare, + TT_LambdaLSquare)) || Right.MatchingParen->is(TT_AttributeParen)); if (Right.is(tok::l_square) && !Right.isOneOf(TT_ObjCMethodExpr, TT_LambdaLSquare, diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index d31ec30ade4ac..8263dc41b315f 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -10515,10 +10515,6 @@ TEST_F(FormatTest, ConfigurableSpacesInSquareBrackets) { FormatStyle Spaces = getLLVMStyle(); Spaces.SpacesInSquareBrackets = true; - // Lambdas unchanged. - verifyFormat("int c = []() -> int { return 2; }();\n", Spaces); - verifyFormat("return [i, args...] {};", Spaces); - // Not lambdas. verifyFormat("int a[ 5 ];", Spaces); verifyFormat("a[ 3 ] += 42;", Spaces); @@ -10529,6 +10525,9 @@ TEST_F(FormatTest, ConfigurableSpacesInSquareBrackets) { verifyFormat("std::unique_ptr foo() {}", Spaces); verifyFormat("int i = a[ a ][ a ]->f();", Spaces); verifyFormat("int i = (*b)[ a ]->f();", Spaces); + // Lambdas. + verifyFormat("int c = []() -> int { return 2; }();\n", Spaces); + verifyFormat("return [ i, args... ] {};", Spaces); } TEST_F(FormatTest, ConfigurableSpaceBeforeAssignmentOperators) { From f609c0a303e4e20356d565d2bd4ecec76ed7ca7e Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sat, 5 Oct 2019 13:20:30 +0000 Subject: [PATCH 035/254] BranchFolding - IsBetterFallthrough - assert non-null pointers. NFCI. Silences static analyzer null dereference warnings. llvm-svn: 373823 --- llvm/lib/CodeGen/BranchFolding.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/llvm/lib/CodeGen/BranchFolding.cpp b/llvm/lib/CodeGen/BranchFolding.cpp index 6c997a73023bc..b0d1599a5ebcf 100644 --- a/llvm/lib/CodeGen/BranchFolding.cpp +++ b/llvm/lib/CodeGen/BranchFolding.cpp @@ -1307,6 +1307,8 @@ static bool IsBranchOnlyBlock(MachineBasicBlock *MBB) { /// result in infinite loops. static bool IsBetterFallthrough(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2) { + assert(MBB1 && MBB2 && "Unknown MachineBasicBlock"); + // Right now, we use a simple heuristic. If MBB2 ends with a call, and // MBB1 doesn't, we prefer to fall through into MBB1. This allows us to // optimize branches that branch to either a return block or an assert block From 20692a0d3d3e837e6f81d477cbc07b4eb449d380 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sat, 5 Oct 2019 13:20:42 +0000 Subject: [PATCH 036/254] SemaStmt - silence static analyzer getAs<> null dereference warnings. NFCI. The static analyzer is warning about potential null dereferences, but we should be able to use castAs<> directly and if not assert will fire for us. llvm-svn: 373824 --- clang/lib/Sema/SemaStmt.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/clang/lib/Sema/SemaStmt.cpp b/clang/lib/Sema/SemaStmt.cpp index bfdc550e75514..841801245c541 100644 --- a/clang/lib/Sema/SemaStmt.cpp +++ b/clang/lib/Sema/SemaStmt.cpp @@ -3305,18 +3305,18 @@ Sema::ActOnCapScopeReturnStmt(SourceLocation ReturnLoc, Expr *RetValExp) { } assert(!FnRetType.isNull()); - if (BlockScopeInfo *CurBlock = dyn_cast(CurCap)) { - if (CurBlock->FunctionType->getAs()->getNoReturnAttr()) { + if (auto *CurBlock = dyn_cast(CurCap)) { + if (CurBlock->FunctionType->castAs()->getNoReturnAttr()) { Diag(ReturnLoc, diag::err_noreturn_block_has_return_expr); return StmtError(); } - } else if (CapturedRegionScopeInfo *CurRegion = - dyn_cast(CurCap)) { + } else if (auto *CurRegion = dyn_cast(CurCap)) { Diag(ReturnLoc, diag::err_return_in_captured_stmt) << CurRegion->getRegionName(); return StmtError(); } else { assert(CurLambda && "unknown kind of captured scope"); - if (CurLambda->CallOperator->getType()->getAs() + if (CurLambda->CallOperator->getType() + ->castAs() ->getNoReturnAttr()) { Diag(ReturnLoc, diag::err_noreturn_lambda_has_return_expr); return StmtError(); From 0e82722f9a10f8ad3cde686bbd9726b08962a024 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sat, 5 Oct 2019 13:20:51 +0000 Subject: [PATCH 037/254] Remove redundant !HasDependentValue check. NFCI. Fixes cppcheck warning. llvm-svn: 373825 --- clang/lib/Sema/SemaStmt.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/Sema/SemaStmt.cpp b/clang/lib/Sema/SemaStmt.cpp index 841801245c541..cff20aab03081 100644 --- a/clang/lib/Sema/SemaStmt.cpp +++ b/clang/lib/Sema/SemaStmt.cpp @@ -963,7 +963,7 @@ Sema::ActOnFinishSwitchStmt(SourceLocation SwitchLoc, Stmt *Switch, // condition is constant. llvm::APSInt ConstantCondValue; bool HasConstantCond = false; - if (!HasDependentValue && !TheDefaultStmt) { + if (!TheDefaultStmt) { Expr::EvalResult Result; HasConstantCond = CondExpr->EvaluateAsInt(Result, Context, Expr::SE_AllowSideEffects); From 22b6873195618fbbe90e30a2921a2984ac95ed20 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sat, 5 Oct 2019 13:20:59 +0000 Subject: [PATCH 038/254] TreeTransform - silence static analyzer getAs<> null dereference warnings. NFCI. The static analyzer is warning about potential null dereferences, but we should be able to use castAs<> directly and if not assert will fire for us. llvm-svn: 373826 --- clang/lib/Sema/TreeTransform.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index 5ccd8cc889ac7..f62c0d891a458 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -4639,7 +4639,7 @@ TreeTransform::TransformReferenceType(TypeLocBuilder &TLB, // Objective-C ARC can add lifetime qualifiers to the type that we're // referring to. TLB.TypeWasModifiedSafely( - Result->getAs()->getPointeeTypeAsWritten()); + Result->castAs()->getPointeeTypeAsWritten()); // r-value references can be rebuilt as l-value references. ReferenceTypeLoc NewTL; @@ -5931,7 +5931,7 @@ QualType TreeTransform::TransformPipeType(TypeLocBuilder &TLB, QualType Result = TL.getType(); if (getDerived().AlwaysRebuild() || ValueType != TL.getValueLoc().getType()) { - const PipeType *PT = Result->getAs(); + const PipeType *PT = Result->castAs(); bool isReadPipe = PT->isReadOnly(); Result = getDerived().RebuildPipeType(ValueType, TL.getKWLoc(), isReadPipe); if (Result.isNull()) @@ -11675,7 +11675,7 @@ TreeTransform::TransformCXXDependentScopeMemberExpr( } else { OldBase = nullptr; BaseType = getDerived().TransformType(E->getBaseType()); - ObjectType = BaseType->getAs()->getPointeeType(); + ObjectType = BaseType->castAs()->getPointeeType(); } // Transform the first part of the nested-name-specifier that qualifies @@ -13195,7 +13195,7 @@ TreeTransform::RebuildCXXPseudoDestructorExpr(Expr *Base, if (Base->isTypeDependent() || Destroyed.getIdentifier() || (!isArrow && !BaseType->getAs()) || (isArrow && BaseType->getAs() && - !BaseType->getAs()->getPointeeType() + !BaseType->castAs()->getPointeeType() ->template getAs())){ // This pseudo-destructor expression is still a pseudo-destructor. return SemaRef.BuildPseudoDestructorExpr( From f4cc3b3e10edd5c8d9b23f1d6806829d20b2c9df Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sat, 5 Oct 2019 13:21:08 +0000 Subject: [PATCH 039/254] SemaTemplate - silence static analyzer getAs<> null dereference warnings. NFCI. The static analyzer is warning about potential null dereferences, but we should be able to use castAs<> directly and if not assert will fire for us. llvm-svn: 373827 --- clang/lib/Sema/SemaTemplate.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp index 847a19baed14a..62dc17254c710 100644 --- a/clang/lib/Sema/SemaTemplate.cpp +++ b/clang/lib/Sema/SemaTemplate.cpp @@ -5872,7 +5872,7 @@ static bool CheckTemplateArgumentIsCompatibleWithParameter( Expr *Arg, QualType ArgType) { bool ObjCLifetimeConversion; if (ParamType->isPointerType() && - !ParamType->getAs()->getPointeeType()->isFunctionType() && + !ParamType->castAs()->getPointeeType()->isFunctionType() && S.IsQualificationConversion(ArgType, ParamType, false, ObjCLifetimeConversion)) { // For pointer-to-object types, qualification conversions are @@ -6748,20 +6748,20 @@ ExprResult Sema::CheckTemplateArgument(NonTypeTemplateParmDecl *Param, // overloaded functions (or a pointer to such), the matching // function is selected from the set (13.4). (ParamType->isPointerType() && - ParamType->getAs()->getPointeeType()->isFunctionType()) || + ParamType->castAs()->getPointeeType()->isFunctionType()) || // -- For a non-type template-parameter of type reference to // function, no conversions apply. If the template-argument // represents a set of overloaded functions, the matching // function is selected from the set (13.4). (ParamType->isReferenceType() && - ParamType->getAs()->getPointeeType()->isFunctionType()) || + ParamType->castAs()->getPointeeType()->isFunctionType()) || // -- For a non-type template-parameter of type pointer to // member function, no conversions apply. If the // template-argument represents a set of overloaded member // functions, the matching member function is selected from // the set (13.4). (ParamType->isMemberPointerType() && - ParamType->getAs()->getPointeeType() + ParamType->castAs()->getPointeeType() ->isFunctionType())) { if (Arg->getType() == Context.OverloadTy) { From 83b81c1f6ee878ffe458aee3162ce0ef004d3374 Mon Sep 17 00:00:00 2001 From: David Bolvansky Date: Sat, 5 Oct 2019 13:28:15 +0000 Subject: [PATCH 040/254] [Diagnostics] Highlight expr's source range for -Wbool-operation Warning message looks better; and GCC adds it too. llvm-svn: 373828 --- clang/lib/Sema/SemaExpr.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index c18f54cefde69..3cb999dacc40b 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -13481,6 +13481,7 @@ ExprResult Sema::CreateBuiltinUnaryOp(SourceLocation OpLoc, << resultType << Input.get()->getSourceRange(); else if (Input.get()->isKnownToHaveBooleanValue()) Diag(OpLoc, diag::warn_bitwise_negation_bool) + << Input.get()->getSourceRange() << FixItHint::CreateReplacement(OpLoc, "!"); else if (resultType->hasIntegerRepresentation()) break; From 43bbca922844617610a4921bb646e49528059117 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sat, 5 Oct 2019 13:42:14 +0000 Subject: [PATCH 041/254] RewriteObjC - silence static analyzer getAs<> null dereference warnings. NFCI. The static analyzer is warning about potential null dereferences, but we should be able to use castAs<> directly and if not assert will fire for us. llvm-svn: 373829 --- clang/lib/Frontend/Rewrite/RewriteObjC.cpp | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/clang/lib/Frontend/Rewrite/RewriteObjC.cpp b/clang/lib/Frontend/Rewrite/RewriteObjC.cpp index dd57976df7a77..6a22da178fbc9 100644 --- a/clang/lib/Frontend/Rewrite/RewriteObjC.cpp +++ b/clang/lib/Frontend/Rewrite/RewriteObjC.cpp @@ -416,7 +416,7 @@ namespace { /// otherwise. bool convertBlockPointerToFunctionPointer(QualType &T) { if (isTopLevelBlockPointerType(T)) { - const BlockPointerType *BPT = T->getAs(); + const auto *BPT = T->castAs(); T = Context->getPointerType(BPT->getPointeeType()); return true; } @@ -1163,6 +1163,7 @@ void RewriteObjC::RewriteObjCMethodDecl(const ObjCInterfaceDecl *IDecl, void RewriteObjC::RewriteImplementationDecl(Decl *OID) { ObjCImplementationDecl *IMD = dyn_cast(OID); ObjCCategoryImplDecl *CID = dyn_cast(OID); + assert((IMD || CID) && "Unknown ImplementationDecl"); InsertText(IMD ? IMD->getBeginLoc() : CID->getBeginLoc(), "// "); @@ -2017,7 +2018,7 @@ RewriteObjC::SynthesizeCallToFunctionDecl(FunctionDecl *FD, ImplicitCastExpr::Create(*Context, pToFunc, CK_FunctionToPointerDecay, DRE, nullptr, VK_RValue); - const FunctionType *FT = msgSendType->getAs(); + const auto *FT = msgSendType->castAs(); CallExpr *Exp = CallExpr::Create( *Context, ICE, Args, FT->getCallResultType(*Context), VK_RValue, EndLoc); @@ -2285,7 +2286,7 @@ void RewriteObjC::RewriteBlockPointerTypeVariable(std::string& Str, void RewriteObjC::RewriteBlockLiteralFunctionDecl(FunctionDecl *FD) { SourceLocation FunLocStart = FD->getTypeSpecStartLoc(); const FunctionType *funcType = FD->getType()->getAs(); - const FunctionProtoType *proto = dyn_cast(funcType); + const FunctionProtoType *proto = dyn_cast_or_null(funcType); if (!proto) return; QualType Type = proto->getReturnType(); @@ -2604,7 +2605,7 @@ CallExpr *RewriteObjC::SynthMsgSendStretCallExpr(FunctionDecl *MsgSendStretFlavo // Don't forget the parens to enforce the proper binding. ParenExpr *PE = new (Context) ParenExpr(SourceLocation(), SourceLocation(), cast); - const FunctionType *FT = msgSendType->getAs(); + const auto *FT = msgSendType->castAs(); CallExpr *STCE = CallExpr::Create(*Context, PE, MsgExprs, FT->getReturnType(), VK_RValue, SourceLocation()); return STCE; @@ -2735,8 +2736,8 @@ Stmt *RewriteObjC::SynthMessageExpr(ObjCMessageExpr *Exp, case ObjCMessageExpr::Class: { SmallVector ClsExprs; - ObjCInterfaceDecl *Class - = Exp->getClassReceiver()->getAs()->getInterface(); + auto *Class = + Exp->getClassReceiver()->castAs()->getInterface(); IdentifierInfo *clsName = Class->getIdentifier(); ClsExprs.push_back(getStringLiteral(clsName->getName())); CallExpr *Cls = SynthesizeCallToFunctionDecl(GetClassFunctionDecl, ClsExprs, @@ -2957,7 +2958,7 @@ Stmt *RewriteObjC::SynthMessageExpr(ObjCMessageExpr *Exp, // Don't forget the parens to enforce the proper binding. ParenExpr *PE = new (Context) ParenExpr(StartLoc, EndLoc, cast); - const FunctionType *FT = msgSendType->getAs(); + const auto *FT = msgSendType->castAs(); CallExpr *CE = CallExpr::Create(*Context, PE, MsgExprs, FT->getReturnType(), VK_RValue, EndLoc); Stmt *ReplacingStmt = CE; From 41c934acaf8539dedad4b48bbc88580c74fed25a Mon Sep 17 00:00:00 2001 From: David Bolvansky Date: Sat, 5 Oct 2019 14:29:25 +0000 Subject: [PATCH 042/254] [SelectionDAG] Add tests for LKK algorithm Added some tests testing urem and srem operations with a constant divisor. Patch by TG908 (Tim Gymnich) Differential Revision: https://reviews.llvm.org/D68421 llvm-svn: 373830 --- llvm/test/CodeGen/AArch64/srem-lkk.ll | 149 ++ llvm/test/CodeGen/AArch64/srem-vector-lkk.ll | 324 ++++ llvm/test/CodeGen/AArch64/urem-lkk.ll | 103 ++ llvm/test/CodeGen/AArch64/urem-vector-lkk.ll | 267 +++ llvm/test/CodeGen/PowerPC/srem-lkk.ll | 149 ++ llvm/test/CodeGen/PowerPC/srem-vector-lkk.ll | 1675 +++++++++++++++++ llvm/test/CodeGen/PowerPC/urem-lkk.ll | 106 ++ llvm/test/CodeGen/PowerPC/urem-vector-lkk.ll | 1338 ++++++++++++++ llvm/test/CodeGen/RISCV/srem-lkk.ll | 583 ++++++ llvm/test/CodeGen/RISCV/srem-vector-lkk.ll | 1689 ++++++++++++++++++ llvm/test/CodeGen/RISCV/urem-lkk.ll | 354 ++++ llvm/test/CodeGen/RISCV/urem-vector-lkk.ll | 1419 +++++++++++++++ llvm/test/CodeGen/X86/srem-lkk.ll | 159 ++ llvm/test/CodeGen/X86/srem-vector-lkk.ll | 556 ++++++ llvm/test/CodeGen/X86/urem-lkk.ll | 108 ++ llvm/test/CodeGen/X86/urem-vector-lkk.ll | 378 ++++ 16 files changed, 9357 insertions(+) create mode 100644 llvm/test/CodeGen/AArch64/srem-lkk.ll create mode 100644 llvm/test/CodeGen/AArch64/srem-vector-lkk.ll create mode 100644 llvm/test/CodeGen/AArch64/urem-lkk.ll create mode 100644 llvm/test/CodeGen/AArch64/urem-vector-lkk.ll create mode 100644 llvm/test/CodeGen/PowerPC/srem-lkk.ll create mode 100644 llvm/test/CodeGen/PowerPC/srem-vector-lkk.ll create mode 100644 llvm/test/CodeGen/PowerPC/urem-lkk.ll create mode 100644 llvm/test/CodeGen/PowerPC/urem-vector-lkk.ll create mode 100644 llvm/test/CodeGen/RISCV/srem-lkk.ll create mode 100644 llvm/test/CodeGen/RISCV/srem-vector-lkk.ll create mode 100644 llvm/test/CodeGen/RISCV/urem-lkk.ll create mode 100644 llvm/test/CodeGen/RISCV/urem-vector-lkk.ll create mode 100644 llvm/test/CodeGen/X86/srem-lkk.ll create mode 100644 llvm/test/CodeGen/X86/srem-vector-lkk.ll create mode 100644 llvm/test/CodeGen/X86/urem-lkk.ll create mode 100644 llvm/test/CodeGen/X86/urem-vector-lkk.ll diff --git a/llvm/test/CodeGen/AArch64/srem-lkk.ll b/llvm/test/CodeGen/AArch64/srem-lkk.ll new file mode 100644 index 0000000000000..321791e9228fc --- /dev/null +++ b/llvm/test/CodeGen/AArch64/srem-lkk.ll @@ -0,0 +1,149 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s + +define i32 @fold_srem_positive_odd(i32 %x) { +; CHECK-LABEL: fold_srem_positive_odd: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #37253 +; CHECK-NEXT: movk w8, #44150, lsl #16 +; CHECK-NEXT: smull x8, w0, w8 +; CHECK-NEXT: lsr x8, x8, #32 +; CHECK-NEXT: add w8, w8, w0 +; CHECK-NEXT: asr w9, w8, #6 +; CHECK-NEXT: add w8, w9, w8, lsr #31 +; CHECK-NEXT: mov w9, #95 +; CHECK-NEXT: msub w0, w8, w9, w0 +; CHECK-NEXT: ret + %1 = srem i32 %x, 95 + ret i32 %1 +} + + +define i32 @fold_srem_positive_even(i32 %x) { +; CHECK-LABEL: fold_srem_positive_even: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #36849 +; CHECK-NEXT: movk w8, #15827, lsl #16 +; CHECK-NEXT: smull x8, w0, w8 +; CHECK-NEXT: lsr x9, x8, #63 +; CHECK-NEXT: asr x8, x8, #40 +; CHECK-NEXT: add w8, w8, w9 +; CHECK-NEXT: mov w9, #1060 +; CHECK-NEXT: msub w0, w8, w9, w0 +; CHECK-NEXT: ret + %1 = srem i32 %x, 1060 + ret i32 %1 +} + + +define i32 @fold_srem_negative_odd(i32 %x) { +; CHECK-LABEL: fold_srem_negative_odd: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #65445 +; CHECK-NEXT: movk w8, #42330, lsl #16 +; CHECK-NEXT: smull x8, w0, w8 +; CHECK-NEXT: lsr x9, x8, #63 +; CHECK-NEXT: asr x8, x8, #40 +; CHECK-NEXT: add w8, w8, w9 +; CHECK-NEXT: mov w9, #-723 +; CHECK-NEXT: msub w0, w8, w9, w0 +; CHECK-NEXT: ret + %1 = srem i32 %x, -723 + ret i32 %1 +} + + +define i32 @fold_srem_negative_even(i32 %x) { +; CHECK-LABEL: fold_srem_negative_even: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #62439 +; CHECK-NEXT: movk w8, #64805, lsl #16 +; CHECK-NEXT: smull x8, w0, w8 +; CHECK-NEXT: lsr x9, x8, #63 +; CHECK-NEXT: asr x8, x8, #40 +; CHECK-NEXT: add w8, w8, w9 +; CHECK-NEXT: mov w9, #-22981 +; CHECK-NEXT: msub w0, w8, w9, w0 +; CHECK-NEXT: ret + %1 = srem i32 %x, -22981 + ret i32 %1 +} + + +; Don't fold if we can combine srem with sdiv. +define i32 @combine_srem_sdiv(i32 %x) { +; CHECK-LABEL: combine_srem_sdiv: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #37253 +; CHECK-NEXT: movk w8, #44150, lsl #16 +; CHECK-NEXT: smull x8, w0, w8 +; CHECK-NEXT: lsr x8, x8, #32 +; CHECK-NEXT: add w8, w8, w0 +; CHECK-NEXT: asr w9, w8, #6 +; CHECK-NEXT: add w8, w9, w8, lsr #31 +; CHECK-NEXT: mov w9, #95 +; CHECK-NEXT: msub w9, w8, w9, w0 +; CHECK-NEXT: add w0, w9, w8 +; CHECK-NEXT: ret + %1 = srem i32 %x, 95 + %2 = sdiv i32 %x, 95 + %3 = add i32 %1, %2 + ret i32 %3 +} + +; Don't fold for divisors that are a power of two. +define i32 @dont_fold_srem_power_of_two(i32 %x) { +; CHECK-LABEL: dont_fold_srem_power_of_two: +; CHECK: // %bb.0: +; CHECK-NEXT: add w8, w0, #63 // =63 +; CHECK-NEXT: cmp w0, #0 // =0 +; CHECK-NEXT: csel w8, w8, w0, lt +; CHECK-NEXT: and w8, w8, #0xffffffc0 +; CHECK-NEXT: sub w0, w0, w8 +; CHECK-NEXT: ret + %1 = srem i32 %x, 64 + ret i32 %1 +} + +; Don't fold if the divisor is one. +define i32 @dont_fold_srem_one(i32 %x) { +; CHECK-LABEL: dont_fold_srem_one: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret + %1 = srem i32 %x, 1 + ret i32 %1 +} + +; Don't fold if the divisor is 2^31. +define i32 @dont_fold_srem_i32_smax(i32 %x) { +; CHECK-LABEL: dont_fold_srem_i32_smax: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #2147483647 +; CHECK-NEXT: add w8, w0, w8 +; CHECK-NEXT: cmp w0, #0 // =0 +; CHECK-NEXT: csel w8, w8, w0, lt +; CHECK-NEXT: and w8, w8, #0x80000000 +; CHECK-NEXT: add w0, w0, w8 +; CHECK-NEXT: ret + %1 = srem i32 %x, 2147483648 + ret i32 %1 +} + +; Don't fold i64 srem +define i64 @dont_fold_srem_i64(i64 %x) { +; CHECK-LABEL: dont_fold_srem_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, #58849 +; CHECK-NEXT: movk x8, #48148, lsl #16 +; CHECK-NEXT: movk x8, #33436, lsl #32 +; CHECK-NEXT: movk x8, #21399, lsl #48 +; CHECK-NEXT: smulh x8, x0, x8 +; CHECK-NEXT: asr x9, x8, #5 +; CHECK-NEXT: add x8, x9, x8, lsr #63 +; CHECK-NEXT: mov w9, #98 +; CHECK-NEXT: msub x0, x8, x9, x0 +; CHECK-NEXT: ret + %1 = srem i64 %x, 98 + ret i64 %1 +} diff --git a/llvm/test/CodeGen/AArch64/srem-vector-lkk.ll b/llvm/test/CodeGen/AArch64/srem-vector-lkk.ll new file mode 100644 index 0000000000000..5597e16576ccc --- /dev/null +++ b/llvm/test/CodeGen/AArch64/srem-vector-lkk.ll @@ -0,0 +1,324 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s + +define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) { +; CHECK-LABEL: fold_srem_vec_1: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w9, #63421 +; CHECK-NEXT: mov w12, #33437 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: smov w8, v0.h[1] +; CHECK-NEXT: movk w9, #31710, lsl #16 +; CHECK-NEXT: smov w11, v0.h[2] +; CHECK-NEXT: movk w12, #21399, lsl #16 +; CHECK-NEXT: smull x12, w11, w12 +; CHECK-NEXT: smull x9, w8, w9 +; CHECK-NEXT: lsr x13, x12, #63 +; CHECK-NEXT: asr x12, x12, #37 +; CHECK-NEXT: lsr x9, x9, #32 +; CHECK-NEXT: add w12, w12, w13 +; CHECK-NEXT: mov w13, #98 +; CHECK-NEXT: sub w9, w9, w8 +; CHECK-NEXT: msub w11, w12, w13, w11 +; CHECK-NEXT: asr w13, w9, #6 +; CHECK-NEXT: add w9, w13, w9, lsr #31 +; CHECK-NEXT: mov w13, #37253 +; CHECK-NEXT: mov w10, #-124 +; CHECK-NEXT: smov w12, v0.h[0] +; CHECK-NEXT: movk w13, #44150, lsl #16 +; CHECK-NEXT: msub w8, w9, w10, w8 +; CHECK-NEXT: smull x10, w12, w13 +; CHECK-NEXT: lsr x10, x10, #32 +; CHECK-NEXT: add w10, w10, w12 +; CHECK-NEXT: asr w13, w10, #6 +; CHECK-NEXT: mov w9, #95 +; CHECK-NEXT: add w10, w13, w10, lsr #31 +; CHECK-NEXT: msub w9, w10, w9, w12 +; CHECK-NEXT: mov w10, #63249 +; CHECK-NEXT: smov w13, v0.h[3] +; CHECK-NEXT: movk w10, #48808, lsl #16 +; CHECK-NEXT: smull x10, w13, w10 +; CHECK-NEXT: lsr x12, x10, #63 +; CHECK-NEXT: asr x10, x10, #40 +; CHECK-NEXT: fmov s0, w9 +; CHECK-NEXT: add w10, w10, w12 +; CHECK-NEXT: mov v0.h[1], w8 +; CHECK-NEXT: mov w8, #-1003 +; CHECK-NEXT: mov v0.h[2], w11 +; CHECK-NEXT: msub w8, w10, w8, w13 +; CHECK-NEXT: mov v0.h[3], w8 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: ret + %1 = srem <4 x i16> %x, + ret <4 x i16> %1 +} + +define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) { +; CHECK-LABEL: fold_srem_vec_2: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w9, #37253 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: smov w8, v0.h[1] +; CHECK-NEXT: movk w9, #44150, lsl #16 +; CHECK-NEXT: smov w10, v0.h[0] +; CHECK-NEXT: smull x13, w8, w9 +; CHECK-NEXT: smov w11, v0.h[2] +; CHECK-NEXT: smull x14, w10, w9 +; CHECK-NEXT: lsr x13, x13, #32 +; CHECK-NEXT: smov w12, v0.h[3] +; CHECK-NEXT: smull x15, w11, w9 +; CHECK-NEXT: lsr x14, x14, #32 +; CHECK-NEXT: add w13, w13, w8 +; CHECK-NEXT: smull x9, w12, w9 +; CHECK-NEXT: lsr x15, x15, #32 +; CHECK-NEXT: add w14, w14, w10 +; CHECK-NEXT: asr w16, w13, #6 +; CHECK-NEXT: lsr x9, x9, #32 +; CHECK-NEXT: add w15, w15, w11 +; CHECK-NEXT: add w13, w16, w13, lsr #31 +; CHECK-NEXT: asr w16, w14, #6 +; CHECK-NEXT: add w9, w9, w12 +; CHECK-NEXT: add w14, w16, w14, lsr #31 +; CHECK-NEXT: asr w16, w15, #6 +; CHECK-NEXT: add w15, w16, w15, lsr #31 +; CHECK-NEXT: asr w16, w9, #6 +; CHECK-NEXT: add w9, w16, w9, lsr #31 +; CHECK-NEXT: mov w16, #95 +; CHECK-NEXT: msub w10, w14, w16, w10 +; CHECK-NEXT: msub w8, w13, w16, w8 +; CHECK-NEXT: fmov s0, w10 +; CHECK-NEXT: msub w11, w15, w16, w11 +; CHECK-NEXT: mov v0.h[1], w8 +; CHECK-NEXT: mov v0.h[2], w11 +; CHECK-NEXT: msub w8, w9, w16, w12 +; CHECK-NEXT: mov v0.h[3], w8 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: ret + %1 = srem <4 x i16> %x, + ret <4 x i16> %1 +} + + +; Don't fold if we can combine srem with sdiv. +define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) { +; CHECK-LABEL: combine_srem_sdiv: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #37253 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: movk w8, #44150, lsl #16 +; CHECK-NEXT: smov w9, v0.h[1] +; CHECK-NEXT: smov w10, v0.h[0] +; CHECK-NEXT: smull x13, w9, w8 +; CHECK-NEXT: smov w11, v0.h[2] +; CHECK-NEXT: smull x14, w10, w8 +; CHECK-NEXT: lsr x13, x13, #32 +; CHECK-NEXT: smov w12, v0.h[3] +; CHECK-NEXT: smull x15, w11, w8 +; CHECK-NEXT: lsr x14, x14, #32 +; CHECK-NEXT: add w13, w13, w9 +; CHECK-NEXT: smull x8, w12, w8 +; CHECK-NEXT: lsr x15, x15, #32 +; CHECK-NEXT: add w14, w14, w10 +; CHECK-NEXT: asr w16, w13, #6 +; CHECK-NEXT: lsr x8, x8, #32 +; CHECK-NEXT: add w15, w15, w11 +; CHECK-NEXT: add w13, w16, w13, lsr #31 +; CHECK-NEXT: asr w16, w14, #6 +; CHECK-NEXT: add w8, w8, w12 +; CHECK-NEXT: add w14, w16, w14, lsr #31 +; CHECK-NEXT: asr w16, w15, #6 +; CHECK-NEXT: add w15, w16, w15, lsr #31 +; CHECK-NEXT: asr w16, w8, #6 +; CHECK-NEXT: add w8, w16, w8, lsr #31 +; CHECK-NEXT: mov w16, #95 +; CHECK-NEXT: msub w10, w14, w16, w10 +; CHECK-NEXT: msub w9, w13, w16, w9 +; CHECK-NEXT: fmov s0, w14 +; CHECK-NEXT: fmov s1, w10 +; CHECK-NEXT: msub w11, w15, w16, w11 +; CHECK-NEXT: mov v0.h[1], w13 +; CHECK-NEXT: mov v1.h[1], w9 +; CHECK-NEXT: msub w12, w8, w16, w12 +; CHECK-NEXT: mov v0.h[2], w15 +; CHECK-NEXT: mov v1.h[2], w11 +; CHECK-NEXT: mov v1.h[3], w12 +; CHECK-NEXT: mov v0.h[3], w8 +; CHECK-NEXT: add v0.4h, v1.4h, v0.4h +; CHECK-NEXT: ret + %1 = srem <4 x i16> %x, + %2 = sdiv <4 x i16> %x, + %3 = add <4 x i16> %1, %2 + ret <4 x i16> %3 +} + +; Don't fold for divisors that are a power of two. +define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) { +; CHECK-LABEL: dont_fold_srem_power_of_two: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: smov w8, v0.h[1] +; CHECK-NEXT: add w12, w8, #31 // =31 +; CHECK-NEXT: cmp w8, #0 // =0 +; CHECK-NEXT: mov w11, #37253 +; CHECK-NEXT: csel w12, w12, w8, lt +; CHECK-NEXT: smov w9, v0.h[0] +; CHECK-NEXT: smov w10, v0.h[3] +; CHECK-NEXT: movk w11, #44150, lsl #16 +; CHECK-NEXT: and w12, w12, #0xffffffe0 +; CHECK-NEXT: sub w8, w8, w12 +; CHECK-NEXT: add w12, w9, #63 // =63 +; CHECK-NEXT: smull x11, w10, w11 +; CHECK-NEXT: cmp w9, #0 // =0 +; CHECK-NEXT: lsr x11, x11, #32 +; CHECK-NEXT: csel w12, w12, w9, lt +; CHECK-NEXT: add w11, w11, w10 +; CHECK-NEXT: and w12, w12, #0xffffffc0 +; CHECK-NEXT: sub w9, w9, w12 +; CHECK-NEXT: asr w12, w11, #6 +; CHECK-NEXT: add w11, w12, w11, lsr #31 +; CHECK-NEXT: smov w12, v0.h[2] +; CHECK-NEXT: fmov s0, w9 +; CHECK-NEXT: add w9, w12, #7 // =7 +; CHECK-NEXT: cmp w12, #0 // =0 +; CHECK-NEXT: csel w9, w9, w12, lt +; CHECK-NEXT: and w9, w9, #0xfffffff8 +; CHECK-NEXT: sub w9, w12, w9 +; CHECK-NEXT: mov v0.h[1], w8 +; CHECK-NEXT: mov w8, #95 +; CHECK-NEXT: mov v0.h[2], w9 +; CHECK-NEXT: msub w8, w11, w8, w10 +; CHECK-NEXT: mov v0.h[3], w8 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: ret + %1 = srem <4 x i16> %x, + ret <4 x i16> %1 +} + +; Don't fold if the divisor is one. +define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) { +; CHECK-LABEL: dont_fold_srem_one: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w9, #17097 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: smov w8, v0.h[2] +; CHECK-NEXT: movk w9, #45590, lsl #16 +; CHECK-NEXT: smull x9, w8, w9 +; CHECK-NEXT: lsr x9, x9, #32 +; CHECK-NEXT: add w9, w9, w8 +; CHECK-NEXT: asr w12, w9, #4 +; CHECK-NEXT: add w9, w12, w9, lsr #31 +; CHECK-NEXT: mov w12, #30865 +; CHECK-NEXT: mov w10, #23 +; CHECK-NEXT: smov w11, v0.h[1] +; CHECK-NEXT: movk w12, #51306, lsl #16 +; CHECK-NEXT: msub w8, w9, w10, w8 +; CHECK-NEXT: smull x10, w11, w12 +; CHECK-NEXT: lsr x10, x10, #32 +; CHECK-NEXT: add w10, w10, w11 +; CHECK-NEXT: asr w12, w10, #9 +; CHECK-NEXT: mov w9, #654 +; CHECK-NEXT: add w10, w12, w10, lsr #31 +; CHECK-NEXT: msub w9, w10, w9, w11 +; CHECK-NEXT: mov w10, #47143 +; CHECK-NEXT: smov w12, v0.h[3] +; CHECK-NEXT: movk w10, #24749, lsl #16 +; CHECK-NEXT: smull x10, w12, w10 +; CHECK-NEXT: lsr x11, x10, #63 +; CHECK-NEXT: asr x10, x10, #43 +; CHECK-NEXT: movi d0, #0000000000000000 +; CHECK-NEXT: add w10, w10, w11 +; CHECK-NEXT: mov v0.h[1], w9 +; CHECK-NEXT: mov w9, #5423 +; CHECK-NEXT: mov v0.h[2], w8 +; CHECK-NEXT: msub w8, w10, w9, w12 +; CHECK-NEXT: mov v0.h[3], w8 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: ret + %1 = srem <4 x i16> %x, + ret <4 x i16> %1 +} + +; Don't fold if the divisor is 2^15. +define <4 x i16> @dont_fold_srem_i16_smax(<4 x i16> %x) { +; CHECK-LABEL: dont_fold_srem_i16_smax: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w10, #17097 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: smov w9, v0.h[2] +; CHECK-NEXT: movk w10, #45590, lsl #16 +; CHECK-NEXT: smull x10, w9, w10 +; CHECK-NEXT: lsr x10, x10, #32 +; CHECK-NEXT: add w10, w10, w9 +; CHECK-NEXT: asr w12, w10, #4 +; CHECK-NEXT: mov w11, #23 +; CHECK-NEXT: add w10, w12, w10, lsr #31 +; CHECK-NEXT: msub w9, w10, w11, w9 +; CHECK-NEXT: mov w10, #47143 +; CHECK-NEXT: smov w12, v0.h[3] +; CHECK-NEXT: movk w10, #24749, lsl #16 +; CHECK-NEXT: smull x10, w12, w10 +; CHECK-NEXT: lsr x11, x10, #63 +; CHECK-NEXT: asr x10, x10, #43 +; CHECK-NEXT: smov w8, v0.h[1] +; CHECK-NEXT: add w10, w10, w11 +; CHECK-NEXT: mov w11, #32767 +; CHECK-NEXT: add w11, w8, w11 +; CHECK-NEXT: cmp w8, #0 // =0 +; CHECK-NEXT: csel w11, w11, w8, lt +; CHECK-NEXT: and w11, w11, #0xffff8000 +; CHECK-NEXT: sub w8, w8, w11 +; CHECK-NEXT: movi d0, #0000000000000000 +; CHECK-NEXT: mov v0.h[1], w8 +; CHECK-NEXT: mov w8, #5423 +; CHECK-NEXT: mov v0.h[2], w9 +; CHECK-NEXT: msub w8, w10, w8, w12 +; CHECK-NEXT: mov v0.h[3], w8 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: ret + %1 = srem <4 x i16> %x, + ret <4 x i16> %1 +} + +; Don't fold i64 srem. +define <4 x i64> @dont_fold_srem_i64(<4 x i64> %x) { +; CHECK-LABEL: dont_fold_srem_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x9, #6055 +; CHECK-NEXT: movk x9, #58853, lsl #16 +; CHECK-NEXT: movk x9, #47142, lsl #32 +; CHECK-NEXT: mov x8, v1.d[1] +; CHECK-NEXT: movk x9, #24749, lsl #48 +; CHECK-NEXT: smulh x9, x8, x9 +; CHECK-NEXT: asr x12, x9, #11 +; CHECK-NEXT: mov w10, #5423 +; CHECK-NEXT: add x9, x12, x9, lsr #63 +; CHECK-NEXT: msub x8, x9, x10, x8 +; CHECK-NEXT: mov x9, #21445 +; CHECK-NEXT: movk x9, #1603, lsl #16 +; CHECK-NEXT: movk x9, #15432, lsl #32 +; CHECK-NEXT: mov x12, v0.d[1] +; CHECK-NEXT: movk x9, #25653, lsl #48 +; CHECK-NEXT: smulh x9, x12, x9 +; CHECK-NEXT: asr x10, x9, #8 +; CHECK-NEXT: add x9, x10, x9, lsr #63 +; CHECK-NEXT: mov w10, #654 +; CHECK-NEXT: msub x9, x9, x10, x12 +; CHECK-NEXT: mov x10, #8549 +; CHECK-NEXT: movk x10, #22795, lsl #16 +; CHECK-NEXT: movk x10, #17096, lsl #32 +; CHECK-NEXT: fmov x11, d1 +; CHECK-NEXT: movk x10, #45590, lsl #48 +; CHECK-NEXT: smulh x10, x11, x10 +; CHECK-NEXT: add x10, x10, x11 +; CHECK-NEXT: asr x12, x10, #4 +; CHECK-NEXT: add x10, x12, x10, lsr #63 +; CHECK-NEXT: mov w12, #23 +; CHECK-NEXT: msub x10, x10, x12, x11 +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: fmov d1, x10 +; CHECK-NEXT: mov v1.d[1], x8 +; CHECK-NEXT: mov v0.d[1], x9 +; CHECK-NEXT: ret + %1 = srem <4 x i64> %x, + ret <4 x i64> %1 +} diff --git a/llvm/test/CodeGen/AArch64/urem-lkk.ll b/llvm/test/CodeGen/AArch64/urem-lkk.ll new file mode 100644 index 0000000000000..3d7f309ddaf31 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/urem-lkk.ll @@ -0,0 +1,103 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s + +define i32 @fold_urem_positive_odd(i32 %x) { +; CHECK-LABEL: fold_urem_positive_odd: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #8969 +; CHECK-NEXT: movk w8, #22765, lsl #16 +; CHECK-NEXT: umull x8, w0, w8 +; CHECK-NEXT: lsr x8, x8, #32 +; CHECK-NEXT: sub w9, w0, w8 +; CHECK-NEXT: add w8, w8, w9, lsr #1 +; CHECK-NEXT: lsr w8, w8, #6 +; CHECK-NEXT: mov w9, #95 +; CHECK-NEXT: msub w0, w8, w9, w0 +; CHECK-NEXT: ret + %1 = urem i32 %x, 95 + ret i32 %1 +} + + +define i32 @fold_urem_positive_even(i32 %x) { +; CHECK-LABEL: fold_urem_positive_even: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #16323 +; CHECK-NEXT: movk w8, #63310, lsl #16 +; CHECK-NEXT: umull x8, w0, w8 +; CHECK-NEXT: lsr x8, x8, #42 +; CHECK-NEXT: mov w9, #1060 +; CHECK-NEXT: msub w0, w8, w9, w0 +; CHECK-NEXT: ret + %1 = urem i32 %x, 1060 + ret i32 %1 +} + + +; Don't fold if we can combine urem with udiv. +define i32 @combine_urem_udiv(i32 %x) { +; CHECK-LABEL: combine_urem_udiv: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #8969 +; CHECK-NEXT: movk w8, #22765, lsl #16 +; CHECK-NEXT: umull x8, w0, w8 +; CHECK-NEXT: lsr x8, x8, #32 +; CHECK-NEXT: sub w9, w0, w8 +; CHECK-NEXT: add w8, w8, w9, lsr #1 +; CHECK-NEXT: lsr w8, w8, #6 +; CHECK-NEXT: mov w9, #95 +; CHECK-NEXT: msub w9, w8, w9, w0 +; CHECK-NEXT: add w0, w9, w8 +; CHECK-NEXT: ret + %1 = urem i32 %x, 95 + %2 = udiv i32 %x, 95 + %3 = add i32 %1, %2 + ret i32 %3 +} + +; Don't fold for divisors that are a power of two. +define i32 @dont_fold_urem_power_of_two(i32 %x) { +; CHECK-LABEL: dont_fold_urem_power_of_two: +; CHECK: // %bb.0: +; CHECK-NEXT: and w0, w0, #0x3f +; CHECK-NEXT: ret + %1 = urem i32 %x, 64 + ret i32 %1 +} + +; Don't fold if the divisor is one. +define i32 @dont_fold_urem_one(i32 %x) { +; CHECK-LABEL: dont_fold_urem_one: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret + %1 = urem i32 %x, 1 + ret i32 %1 +} + +; Don't fold if the divisor is 2^32. +define i32 @dont_fold_urem_i32_umax(i32 %x) { +; CHECK-LABEL: dont_fold_urem_i32_umax: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %1 = urem i32 %x, 4294967296 + ret i32 %1 +} + +; Don't fold i64 urem +define i64 @dont_fold_urem_i64(i64 %x) { +; CHECK-LABEL: dont_fold_urem_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x9, #58849 +; CHECK-NEXT: movk x9, #48148, lsl #16 +; CHECK-NEXT: movk x9, #33436, lsl #32 +; CHECK-NEXT: lsr x8, x0, #1 +; CHECK-NEXT: movk x9, #21399, lsl #48 +; CHECK-NEXT: umulh x8, x8, x9 +; CHECK-NEXT: lsr x8, x8, #4 +; CHECK-NEXT: mov w9, #98 +; CHECK-NEXT: msub x0, x8, x9, x0 +; CHECK-NEXT: ret + %1 = urem i64 %x, 98 + ret i64 %1 +} diff --git a/llvm/test/CodeGen/AArch64/urem-vector-lkk.ll b/llvm/test/CodeGen/AArch64/urem-vector-lkk.ll new file mode 100644 index 0000000000000..c5951a4993fc3 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/urem-vector-lkk.ll @@ -0,0 +1,267 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s + +define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) { +; CHECK-LABEL: fold_urem_vec_1: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w11, #33437 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: umov w10, v0.h[2] +; CHECK-NEXT: movk w11, #21399, lsl #16 +; CHECK-NEXT: umull x11, w10, w11 +; CHECK-NEXT: umov w8, v0.h[1] +; CHECK-NEXT: mov w9, #16913 +; CHECK-NEXT: mov w12, #98 +; CHECK-NEXT: lsr x11, x11, #37 +; CHECK-NEXT: movk w9, #8456, lsl #16 +; CHECK-NEXT: msub w10, w11, w12, w10 +; CHECK-NEXT: ubfx w12, w8, #2, #14 +; CHECK-NEXT: umull x9, w12, w9 +; CHECK-NEXT: mov w11, #124 +; CHECK-NEXT: lsr x9, x9, #34 +; CHECK-NEXT: msub w8, w9, w11, w8 +; CHECK-NEXT: mov w9, #8969 +; CHECK-NEXT: umov w12, v0.h[0] +; CHECK-NEXT: movk w9, #22765, lsl #16 +; CHECK-NEXT: umull x9, w12, w9 +; CHECK-NEXT: lsr x9, x9, #32 +; CHECK-NEXT: sub w11, w12, w9 +; CHECK-NEXT: add w9, w9, w11, lsr #1 +; CHECK-NEXT: mov w11, #95 +; CHECK-NEXT: lsr w9, w9, #6 +; CHECK-NEXT: msub w9, w9, w11, w12 +; CHECK-NEXT: umov w11, v0.h[3] +; CHECK-NEXT: fmov s0, w9 +; CHECK-NEXT: mov w9, #2287 +; CHECK-NEXT: movk w9, #16727, lsl #16 +; CHECK-NEXT: umull x9, w11, w9 +; CHECK-NEXT: mov v0.h[1], w8 +; CHECK-NEXT: mov w8, #1003 +; CHECK-NEXT: lsr x9, x9, #40 +; CHECK-NEXT: mov v0.h[2], w10 +; CHECK-NEXT: msub w8, w9, w8, w11 +; CHECK-NEXT: mov v0.h[3], w8 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: ret + %1 = urem <4 x i16> %x, + ret <4 x i16> %1 +} + +define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) { +; CHECK-LABEL: fold_urem_vec_2: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w9, #8969 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: umov w8, v0.h[1] +; CHECK-NEXT: movk w9, #22765, lsl #16 +; CHECK-NEXT: umov w10, v0.h[0] +; CHECK-NEXT: umull x13, w8, w9 +; CHECK-NEXT: umov w11, v0.h[2] +; CHECK-NEXT: umull x14, w10, w9 +; CHECK-NEXT: lsr x13, x13, #32 +; CHECK-NEXT: umov w12, v0.h[3] +; CHECK-NEXT: umull x15, w11, w9 +; CHECK-NEXT: lsr x14, x14, #32 +; CHECK-NEXT: sub w16, w8, w13 +; CHECK-NEXT: umull x9, w12, w9 +; CHECK-NEXT: lsr x15, x15, #32 +; CHECK-NEXT: add w13, w13, w16, lsr #1 +; CHECK-NEXT: sub w16, w10, w14 +; CHECK-NEXT: lsr x9, x9, #32 +; CHECK-NEXT: add w14, w14, w16, lsr #1 +; CHECK-NEXT: sub w16, w11, w15 +; CHECK-NEXT: add w15, w15, w16, lsr #1 +; CHECK-NEXT: sub w16, w12, w9 +; CHECK-NEXT: add w9, w9, w16, lsr #1 +; CHECK-NEXT: mov w16, #95 +; CHECK-NEXT: lsr w13, w13, #6 +; CHECK-NEXT: msub w8, w13, w16, w8 +; CHECK-NEXT: lsr w13, w14, #6 +; CHECK-NEXT: msub w10, w13, w16, w10 +; CHECK-NEXT: lsr w13, w15, #6 +; CHECK-NEXT: fmov s0, w10 +; CHECK-NEXT: msub w11, w13, w16, w11 +; CHECK-NEXT: lsr w9, w9, #6 +; CHECK-NEXT: mov v0.h[1], w8 +; CHECK-NEXT: mov v0.h[2], w11 +; CHECK-NEXT: msub w8, w9, w16, w12 +; CHECK-NEXT: mov v0.h[3], w8 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: ret + %1 = urem <4 x i16> %x, + ret <4 x i16> %1 +} + + +; Don't fold if we can combine urem with udiv. +define <4 x i16> @combine_urem_udiv(<4 x i16> %x) { +; CHECK-LABEL: combine_urem_udiv: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #8969 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: movk w8, #22765, lsl #16 +; CHECK-NEXT: umov w9, v0.h[1] +; CHECK-NEXT: umov w10, v0.h[0] +; CHECK-NEXT: umull x13, w9, w8 +; CHECK-NEXT: umov w11, v0.h[2] +; CHECK-NEXT: umull x14, w10, w8 +; CHECK-NEXT: lsr x13, x13, #32 +; CHECK-NEXT: umov w12, v0.h[3] +; CHECK-NEXT: umull x15, w11, w8 +; CHECK-NEXT: lsr x14, x14, #32 +; CHECK-NEXT: sub w16, w9, w13 +; CHECK-NEXT: umull x8, w12, w8 +; CHECK-NEXT: lsr x15, x15, #32 +; CHECK-NEXT: add w13, w13, w16, lsr #1 +; CHECK-NEXT: sub w16, w10, w14 +; CHECK-NEXT: lsr x8, x8, #32 +; CHECK-NEXT: add w14, w14, w16, lsr #1 +; CHECK-NEXT: sub w16, w11, w15 +; CHECK-NEXT: add w15, w15, w16, lsr #1 +; CHECK-NEXT: sub w16, w12, w8 +; CHECK-NEXT: add w8, w8, w16, lsr #1 +; CHECK-NEXT: mov w16, #95 +; CHECK-NEXT: lsr w14, w14, #6 +; CHECK-NEXT: lsr w13, w13, #6 +; CHECK-NEXT: msub w10, w14, w16, w10 +; CHECK-NEXT: lsr w15, w15, #6 +; CHECK-NEXT: msub w9, w13, w16, w9 +; CHECK-NEXT: fmov s0, w14 +; CHECK-NEXT: fmov s1, w10 +; CHECK-NEXT: lsr w8, w8, #6 +; CHECK-NEXT: msub w11, w15, w16, w11 +; CHECK-NEXT: mov v0.h[1], w13 +; CHECK-NEXT: mov v1.h[1], w9 +; CHECK-NEXT: msub w12, w8, w16, w12 +; CHECK-NEXT: mov v0.h[2], w15 +; CHECK-NEXT: mov v1.h[2], w11 +; CHECK-NEXT: mov v1.h[3], w12 +; CHECK-NEXT: mov v0.h[3], w8 +; CHECK-NEXT: add v0.4h, v1.4h, v0.4h +; CHECK-NEXT: ret + %1 = urem <4 x i16> %x, + %2 = udiv <4 x i16> %x, + %3 = add <4 x i16> %1, %2 + ret <4 x i16> %3 +} + + +; Don't fold for divisors that are a power of two. +define <4 x i16> @dont_fold_urem_power_of_two(<4 x i16> %x) { +; CHECK-LABEL: dont_fold_urem_power_of_two: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w9, #8969 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: umov w8, v0.h[3] +; CHECK-NEXT: movk w9, #22765, lsl #16 +; CHECK-NEXT: umull x9, w8, w9 +; CHECK-NEXT: lsr x9, x9, #32 +; CHECK-NEXT: sub w10, w8, w9 +; CHECK-NEXT: add w9, w9, w10, lsr #1 +; CHECK-NEXT: mov w10, #95 +; CHECK-NEXT: lsr w9, w9, #6 +; CHECK-NEXT: msub w8, w9, w10, w8 +; CHECK-NEXT: umov w9, v0.h[0] +; CHECK-NEXT: and w9, w9, #0x3f +; CHECK-NEXT: umov w10, v0.h[1] +; CHECK-NEXT: fmov s1, w9 +; CHECK-NEXT: umov w9, v0.h[2] +; CHECK-NEXT: and w10, w10, #0x1f +; CHECK-NEXT: and w9, w9, #0x7 +; CHECK-NEXT: mov v1.h[1], w10 +; CHECK-NEXT: mov v1.h[2], w9 +; CHECK-NEXT: mov v1.h[3], w8 +; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: ret + %1 = urem <4 x i16> %x, + ret <4 x i16> %1 +} + +; Don't fold if the divisor is one. +define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) { +; CHECK-LABEL: dont_fold_srem_one: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w9, #17097 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: umov w8, v0.h[2] +; CHECK-NEXT: movk w9, #45590, lsl #16 +; CHECK-NEXT: umull x9, w8, w9 +; CHECK-NEXT: mov w10, #23 +; CHECK-NEXT: lsr x9, x9, #36 +; CHECK-NEXT: umov w11, v0.h[1] +; CHECK-NEXT: msub w8, w9, w10, w8 +; CHECK-NEXT: mov w9, #30865 +; CHECK-NEXT: movk w9, #51306, lsl #16 +; CHECK-NEXT: ubfx w10, w11, #1, #15 +; CHECK-NEXT: umull x9, w10, w9 +; CHECK-NEXT: mov w10, #654 +; CHECK-NEXT: lsr x9, x9, #40 +; CHECK-NEXT: msub w9, w9, w10, w11 +; CHECK-NEXT: mov w11, #47143 +; CHECK-NEXT: umov w10, v0.h[3] +; CHECK-NEXT: movk w11, #24749, lsl #16 +; CHECK-NEXT: movi d1, #0000000000000000 +; CHECK-NEXT: umull x11, w10, w11 +; CHECK-NEXT: mov v1.h[1], w9 +; CHECK-NEXT: mov w9, #5423 +; CHECK-NEXT: lsr x11, x11, #43 +; CHECK-NEXT: mov v1.h[2], w8 +; CHECK-NEXT: msub w8, w11, w9, w10 +; CHECK-NEXT: mov v1.h[3], w8 +; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: ret + %1 = urem <4 x i16> %x, + ret <4 x i16> %1 +} + +; Don't fold if the divisor is 2^16. +define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) { +; CHECK-LABEL: dont_fold_urem_i16_smax: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %1 = urem <4 x i16> %x, + ret <4 x i16> %1 +} + +; Don't fold i64 urem. +define <4 x i64> @dont_fold_urem_i64(<4 x i64> %x) { +; CHECK-LABEL: dont_fold_urem_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x10, #12109 +; CHECK-NEXT: movk x10, #52170, lsl #16 +; CHECK-NEXT: movk x10, #28749, lsl #32 +; CHECK-NEXT: mov x8, v1.d[1] +; CHECK-NEXT: movk x10, #49499, lsl #48 +; CHECK-NEXT: umulh x10, x8, x10 +; CHECK-NEXT: mov w11, #5423 +; CHECK-NEXT: lsr x10, x10, #12 +; CHECK-NEXT: msub x8, x10, x11, x8 +; CHECK-NEXT: mov x10, #21445 +; CHECK-NEXT: movk x10, #1603, lsl #16 +; CHECK-NEXT: mov x12, v0.d[1] +; CHECK-NEXT: movk x10, #15432, lsl #32 +; CHECK-NEXT: movk x10, #25653, lsl #48 +; CHECK-NEXT: lsr x11, x12, #1 +; CHECK-NEXT: umulh x10, x11, x10 +; CHECK-NEXT: mov w11, #654 +; CHECK-NEXT: lsr x10, x10, #7 +; CHECK-NEXT: msub x10, x10, x11, x12 +; CHECK-NEXT: mov x11, #17097 +; CHECK-NEXT: movk x11, #45590, lsl #16 +; CHECK-NEXT: movk x11, #34192, lsl #32 +; CHECK-NEXT: fmov x9, d1 +; CHECK-NEXT: movk x11, #25644, lsl #48 +; CHECK-NEXT: umulh x11, x9, x11 +; CHECK-NEXT: sub x12, x9, x11 +; CHECK-NEXT: add x11, x11, x12, lsr #1 +; CHECK-NEXT: mov w12, #23 +; CHECK-NEXT: lsr x11, x11, #4 +; CHECK-NEXT: msub x9, x11, x12, x9 +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: fmov d1, x9 +; CHECK-NEXT: mov v1.d[1], x8 +; CHECK-NEXT: mov v0.d[1], x10 +; CHECK-NEXT: ret + %1 = urem <4 x i64> %x, + ret <4 x i64> %1 +} diff --git a/llvm/test/CodeGen/PowerPC/srem-lkk.ll b/llvm/test/CodeGen/PowerPC/srem-lkk.ll new file mode 100644 index 0000000000000..ccd1b612235af --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/srem-lkk.ll @@ -0,0 +1,149 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-linux-gnu -mcpu=ppc64 < %s | FileCheck -check-prefixes=CHECK,CHECK64 %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-linux-gnu -mcpu=ppc < %s | FileCheck -check-prefixes=CHECK,CHECK32 %s + +define i32 @fold_srem_positive_odd(i32 %x) { +; CHECK-LABEL: fold_srem_positive_odd: +; CHECK: # %bb.0: +; CHECK-NEXT: lis 4, -21386 +; CHECK-NEXT: ori 4, 4, 37253 +; CHECK-NEXT: mulhw 4, 3, 4 +; CHECK-NEXT: add 4, 4, 3 +; CHECK-NEXT: srwi 5, 4, 31 +; CHECK-NEXT: srawi 4, 4, 6 +; CHECK-NEXT: add 4, 4, 5 +; CHECK-NEXT: mulli 4, 4, 95 +; CHECK-NEXT: subf 3, 4, 3 +; CHECK-NEXT: blr + %1 = srem i32 %x, 95 + ret i32 %1 +} + + +define i32 @fold_srem_positive_even(i32 %x) { +; CHECK-LABEL: fold_srem_positive_even: +; CHECK: # %bb.0: +; CHECK-NEXT: lis 4, 15827 +; CHECK-NEXT: ori 4, 4, 36849 +; CHECK-NEXT: mulhw 4, 3, 4 +; CHECK-NEXT: srwi 5, 4, 31 +; CHECK-NEXT: srawi 4, 4, 8 +; CHECK-NEXT: add 4, 4, 5 +; CHECK-NEXT: mulli 4, 4, 1060 +; CHECK-NEXT: subf 3, 4, 3 +; CHECK-NEXT: blr + %1 = srem i32 %x, 1060 + ret i32 %1 +} + + +define i32 @fold_srem_negative_odd(i32 %x) { +; CHECK-LABEL: fold_srem_negative_odd: +; CHECK: # %bb.0: +; CHECK-NEXT: lis 4, -23206 +; CHECK-NEXT: ori 4, 4, 65445 +; CHECK-NEXT: mulhw 4, 3, 4 +; CHECK-NEXT: srwi 5, 4, 31 +; CHECK-NEXT: srawi 4, 4, 8 +; CHECK-NEXT: add 4, 4, 5 +; CHECK-NEXT: mulli 4, 4, -723 +; CHECK-NEXT: subf 3, 4, 3 +; CHECK-NEXT: blr + %1 = srem i32 %x, -723 + ret i32 %1 +} + + +define i32 @fold_srem_negative_even(i32 %x) { +; CHECK-LABEL: fold_srem_negative_even: +; CHECK: # %bb.0: +; CHECK-NEXT: lis 4, -731 +; CHECK-NEXT: ori 4, 4, 62439 +; CHECK-NEXT: mulhw 4, 3, 4 +; CHECK-NEXT: srwi 5, 4, 31 +; CHECK-NEXT: srawi 4, 4, 8 +; CHECK-NEXT: add 4, 4, 5 +; CHECK-NEXT: mulli 4, 4, -22981 +; CHECK-NEXT: subf 3, 4, 3 +; CHECK-NEXT: blr + %1 = srem i32 %x, -22981 + ret i32 %1 +} + + +; Don't fold if we can combine srem with sdiv. +define i32 @combine_srem_sdiv(i32 %x) { +; CHECK-LABEL: combine_srem_sdiv: +; CHECK: # %bb.0: +; CHECK-NEXT: lis 4, -21386 +; CHECK-NEXT: ori 4, 4, 37253 +; CHECK-NEXT: mulhw 4, 3, 4 +; CHECK-NEXT: add 4, 4, 3 +; CHECK-NEXT: srwi 5, 4, 31 +; CHECK-NEXT: srawi 4, 4, 6 +; CHECK-NEXT: add 4, 4, 5 +; CHECK-NEXT: mulli 5, 4, 95 +; CHECK-NEXT: subf 3, 5, 3 +; CHECK-NEXT: add 3, 3, 4 +; CHECK-NEXT: blr + %1 = srem i32 %x, 95 + %2 = sdiv i32 %x, 95 + %3 = add i32 %1, %2 + ret i32 %3 +} + +; Don't fold for divisors that are a power of two. +define i32 @dont_fold_srem_power_of_two(i32 %x) { +; CHECK-LABEL: dont_fold_srem_power_of_two: +; CHECK: # %bb.0: +; CHECK-NEXT: srawi 4, 3, 6 +; CHECK-NEXT: addze 4, 4 +; CHECK-NEXT: slwi 4, 4, 6 +; CHECK-NEXT: subf 3, 4, 3 +; CHECK-NEXT: blr + %1 = srem i32 %x, 64 + ret i32 %1 +} + +; Don't fold if the divisor is one. +define i32 @dont_fold_srem_one(i32 %x) { +; CHECK-LABEL: dont_fold_srem_one: +; CHECK: # %bb.0: +; CHECK-NEXT: li 3, 0 +; CHECK-NEXT: blr + %1 = srem i32 %x, 1 + ret i32 %1 +} + +; Don't fold if the divisor is 2^31. +define i32 @dont_fold_srem_i32_smax(i32 %x) { +; CHECK-LABEL: dont_fold_srem_i32_smax: +; CHECK: # %bb.0: +; CHECK-NEXT: srawi 4, 3, 31 +; CHECK-NEXT: addze 4, 4 +; CHECK-NEXT: slwi 4, 4, 31 +; CHECK-NEXT: add 3, 3, 4 +; CHECK-NEXT: blr + %1 = srem i32 %x, 2147483648 + ret i32 %1 +} + +; Don't fold i64 srem +define i64 @dont_fold_srem_i64(i64 %x) { +; CHECK-LABEL: dont_fold_srem_i64: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr 0 +; CHECK-NEXT: stw 0, 4(1) +; CHECK-NEXT: stwu 1, -16(1) +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset lr, 4 +; CHECK-NEXT: li 5, 0 +; CHECK-NEXT: li 6, 98 +; CHECK-NEXT: bl __moddi3@PLT +; CHECK-NEXT: lwz 0, 20(1) +; CHECK-NEXT: addi 1, 1, 16 +; CHECK-NEXT: mtlr 0 +; CHECK-NEXT: blr + %1 = srem i64 %x, 98 + ret i64 %1 +} diff --git a/llvm/test/CodeGen/PowerPC/srem-vector-lkk.ll b/llvm/test/CodeGen/PowerPC/srem-vector-lkk.ll new file mode 100644 index 0000000000000..d795f6b62fab3 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/srem-vector-lkk.ll @@ -0,0 +1,1675 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ +; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,P9LE +; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ +; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,P9BE +; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ +; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,P8LE +; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ +; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,P8BE + +define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) { +; P9LE-LABEL: fold_srem_vec_1: +; P9LE: # %bb.0: +; P9LE-NEXT: li r3, 0 +; P9LE-NEXT: vextuhrx r3, r3, v2 +; P9LE-NEXT: extsh r4, r3 +; P9LE-NEXT: lis r5, -21386 +; P9LE-NEXT: ori r5, r5, 37253 +; P9LE-NEXT: extsw r4, r4 +; P9LE-NEXT: mulld r5, r4, r5 +; P9LE-NEXT: rldicl r5, r5, 32, 32 +; P9LE-NEXT: add r4, r5, r4 +; P9LE-NEXT: srwi r5, r4, 31 +; P9LE-NEXT: srawi r4, r4, 6 +; P9LE-NEXT: add r4, r4, r5 +; P9LE-NEXT: lis r5, 31710 +; P9LE-NEXT: mulli r4, r4, 95 +; P9LE-NEXT: subf r3, r4, r3 +; P9LE-NEXT: mtvsrd f0, r3 +; P9LE-NEXT: li r3, 2 +; P9LE-NEXT: vextuhrx r3, r3, v2 +; P9LE-NEXT: extsh r4, r3 +; P9LE-NEXT: extsw r4, r4 +; P9LE-NEXT: ori r5, r5, 63421 +; P9LE-NEXT: mulld r5, r4, r5 +; P9LE-NEXT: rldicl r5, r5, 32, 32 +; P9LE-NEXT: subf r4, r4, r5 +; P9LE-NEXT: srwi r5, r4, 31 +; P9LE-NEXT: srawi r4, r4, 6 +; P9LE-NEXT: add r4, r4, r5 +; P9LE-NEXT: lis r5, 21399 +; P9LE-NEXT: mulli r4, r4, -124 +; P9LE-NEXT: subf r3, r4, r3 +; P9LE-NEXT: xxswapd v3, vs0 +; P9LE-NEXT: mtvsrd f0, r3 +; P9LE-NEXT: li r3, 4 +; P9LE-NEXT: vextuhrx r3, r3, v2 +; P9LE-NEXT: extsh r4, r3 +; P9LE-NEXT: extsw r4, r4 +; P9LE-NEXT: ori r5, r5, 33437 +; P9LE-NEXT: mulld r4, r4, r5 +; P9LE-NEXT: rldicl r5, r4, 1, 63 +; P9LE-NEXT: rldicl r4, r4, 32, 32 +; P9LE-NEXT: srawi r4, r4, 5 +; P9LE-NEXT: add r4, r4, r5 +; P9LE-NEXT: lis r5, -16728 +; P9LE-NEXT: mulli r4, r4, 98 +; P9LE-NEXT: subf r3, r4, r3 +; P9LE-NEXT: xxswapd v4, vs0 +; P9LE-NEXT: mtvsrd f0, r3 +; P9LE-NEXT: li r3, 6 +; P9LE-NEXT: vextuhrx r3, r3, v2 +; P9LE-NEXT: extsh r4, r3 +; P9LE-NEXT: extsw r4, r4 +; P9LE-NEXT: ori r5, r5, 63249 +; P9LE-NEXT: mulld r4, r4, r5 +; P9LE-NEXT: rldicl r5, r4, 1, 63 +; P9LE-NEXT: rldicl r4, r4, 32, 32 +; P9LE-NEXT: srawi r4, r4, 8 +; P9LE-NEXT: add r4, r4, r5 +; P9LE-NEXT: mulli r4, r4, -1003 +; P9LE-NEXT: subf r3, r4, r3 +; P9LE-NEXT: vmrglh v3, v4, v3 +; P9LE-NEXT: xxswapd v4, vs0 +; P9LE-NEXT: mtvsrd f0, r3 +; P9LE-NEXT: xxswapd v2, vs0 +; P9LE-NEXT: vmrglh v2, v2, v4 +; P9LE-NEXT: vmrglw v2, v2, v3 +; P9LE-NEXT: blr +; +; P9BE-LABEL: fold_srem_vec_1: +; P9BE: # %bb.0: +; P9BE-NEXT: li r3, 2 +; P9BE-NEXT: vextuhlx r3, r3, v2 +; P9BE-NEXT: extsh r3, r3 +; P9BE-NEXT: lis r4, 31710 +; P9BE-NEXT: ori r4, r4, 63421 +; P9BE-NEXT: extsw r3, r3 +; P9BE-NEXT: mulld r4, r3, r4 +; P9BE-NEXT: rldicl r4, r4, 32, 32 +; P9BE-NEXT: subf r4, r3, r4 +; P9BE-NEXT: srwi r5, r4, 31 +; P9BE-NEXT: srawi r4, r4, 6 +; P9BE-NEXT: add r4, r4, r5 +; P9BE-NEXT: mulli r4, r4, -124 +; P9BE-NEXT: subf r3, r4, r3 +; P9BE-NEXT: lis r4, -21386 +; P9BE-NEXT: sldi r3, r3, 48 +; P9BE-NEXT: mtvsrd v3, r3 +; P9BE-NEXT: li r3, 0 +; P9BE-NEXT: vextuhlx r3, r3, v2 +; P9BE-NEXT: extsh r3, r3 +; P9BE-NEXT: extsw r3, r3 +; P9BE-NEXT: ori r4, r4, 37253 +; P9BE-NEXT: mulld r4, r3, r4 +; P9BE-NEXT: rldicl r4, r4, 32, 32 +; P9BE-NEXT: add r4, r4, r3 +; P9BE-NEXT: srwi r5, r4, 31 +; P9BE-NEXT: srawi r4, r4, 6 +; P9BE-NEXT: add r4, r4, r5 +; P9BE-NEXT: mulli r4, r4, 95 +; P9BE-NEXT: subf r3, r4, r3 +; P9BE-NEXT: lis r4, -16728 +; P9BE-NEXT: sldi r3, r3, 48 +; P9BE-NEXT: mtvsrd v4, r3 +; P9BE-NEXT: li r3, 6 +; P9BE-NEXT: vextuhlx r3, r3, v2 +; P9BE-NEXT: extsh r3, r3 +; P9BE-NEXT: extsw r3, r3 +; P9BE-NEXT: ori r4, r4, 63249 +; P9BE-NEXT: mulld r4, r3, r4 +; P9BE-NEXT: rldicl r5, r4, 1, 63 +; P9BE-NEXT: rldicl r4, r4, 32, 32 +; P9BE-NEXT: srawi r4, r4, 8 +; P9BE-NEXT: add r4, r4, r5 +; P9BE-NEXT: mulli r4, r4, -1003 +; P9BE-NEXT: subf r3, r4, r3 +; P9BE-NEXT: lis r4, 21399 +; P9BE-NEXT: sldi r3, r3, 48 +; P9BE-NEXT: vmrghh v3, v4, v3 +; P9BE-NEXT: mtvsrd v4, r3 +; P9BE-NEXT: li r3, 4 +; P9BE-NEXT: vextuhlx r3, r3, v2 +; P9BE-NEXT: extsh r3, r3 +; P9BE-NEXT: extsw r3, r3 +; P9BE-NEXT: ori r4, r4, 33437 +; P9BE-NEXT: mulld r4, r3, r4 +; P9BE-NEXT: rldicl r5, r4, 1, 63 +; P9BE-NEXT: rldicl r4, r4, 32, 32 +; P9BE-NEXT: srawi r4, r4, 5 +; P9BE-NEXT: add r4, r4, r5 +; P9BE-NEXT: mulli r4, r4, 98 +; P9BE-NEXT: subf r3, r4, r3 +; P9BE-NEXT: sldi r3, r3, 48 +; P9BE-NEXT: mtvsrd v2, r3 +; P9BE-NEXT: vmrghh v2, v2, v4 +; P9BE-NEXT: vmrghw v2, v3, v2 +; P9BE-NEXT: blr +; +; P8LE-LABEL: fold_srem_vec_1: +; P8LE: # %bb.0: +; P8LE-NEXT: xxswapd vs0, v2 +; P8LE-NEXT: lis r4, 21399 +; P8LE-NEXT: lis r9, -16728 +; P8LE-NEXT: lis r11, -21386 +; P8LE-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; P8LE-NEXT: ori r4, r4, 33437 +; P8LE-NEXT: ori r9, r9, 63249 +; P8LE-NEXT: ori r11, r11, 37253 +; P8LE-NEXT: mfvsrd r5, f0 +; P8LE-NEXT: rldicl r3, r5, 32, 48 +; P8LE-NEXT: rldicl r6, r5, 16, 48 +; P8LE-NEXT: clrldi r7, r5, 48 +; P8LE-NEXT: extsh r8, r3 +; P8LE-NEXT: extsh r10, r6 +; P8LE-NEXT: rldicl r5, r5, 48, 48 +; P8LE-NEXT: extsw r8, r8 +; P8LE-NEXT: extsh r12, r7 +; P8LE-NEXT: extsw r10, r10 +; P8LE-NEXT: mulld r4, r8, r4 +; P8LE-NEXT: lis r8, 31710 +; P8LE-NEXT: extsh r0, r5 +; P8LE-NEXT: extsw r12, r12 +; P8LE-NEXT: mulld r9, r10, r9 +; P8LE-NEXT: ori r8, r8, 63421 +; P8LE-NEXT: extsw r10, r0 +; P8LE-NEXT: mulld r11, r12, r11 +; P8LE-NEXT: mulld r8, r10, r8 +; P8LE-NEXT: rldicl r0, r4, 1, 63 +; P8LE-NEXT: rldicl r4, r4, 32, 32 +; P8LE-NEXT: rldicl r30, r9, 1, 63 +; P8LE-NEXT: rldicl r9, r9, 32, 32 +; P8LE-NEXT: rldicl r11, r11, 32, 32 +; P8LE-NEXT: rldicl r8, r8, 32, 32 +; P8LE-NEXT: add r11, r11, r12 +; P8LE-NEXT: srawi r4, r4, 5 +; P8LE-NEXT: subf r8, r10, r8 +; P8LE-NEXT: srawi r9, r9, 8 +; P8LE-NEXT: srwi r10, r11, 31 +; P8LE-NEXT: add r4, r4, r0 +; P8LE-NEXT: srawi r11, r11, 6 +; P8LE-NEXT: add r9, r9, r30 +; P8LE-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; P8LE-NEXT: add r10, r11, r10 +; P8LE-NEXT: srwi r11, r8, 31 +; P8LE-NEXT: srawi r8, r8, 6 +; P8LE-NEXT: mulli r4, r4, 98 +; P8LE-NEXT: mulli r9, r9, -1003 +; P8LE-NEXT: add r8, r8, r11 +; P8LE-NEXT: mulli r10, r10, 95 +; P8LE-NEXT: mulli r8, r8, -124 +; P8LE-NEXT: subf r3, r4, r3 +; P8LE-NEXT: subf r4, r9, r6 +; P8LE-NEXT: mtvsrd f0, r3 +; P8LE-NEXT: subf r3, r10, r7 +; P8LE-NEXT: mtvsrd f1, r4 +; P8LE-NEXT: subf r4, r8, r5 +; P8LE-NEXT: mtvsrd f2, r3 +; P8LE-NEXT: xxswapd v2, vs0 +; P8LE-NEXT: mtvsrd f3, r4 +; P8LE-NEXT: xxswapd v3, vs1 +; P8LE-NEXT: xxswapd v4, vs2 +; P8LE-NEXT: xxswapd v5, vs3 +; P8LE-NEXT: vmrglh v2, v3, v2 +; P8LE-NEXT: vmrglh v3, v5, v4 +; P8LE-NEXT: vmrglw v2, v2, v3 +; P8LE-NEXT: blr +; +; P8BE-LABEL: fold_srem_vec_1: +; P8BE: # %bb.0: +; P8BE-NEXT: mfvsrd r4, v2 +; P8BE-NEXT: lis r3, -16728 +; P8BE-NEXT: lis r9, 31710 +; P8BE-NEXT: lis r8, 21399 +; P8BE-NEXT: lis r10, -21386 +; P8BE-NEXT: ori r3, r3, 63249 +; P8BE-NEXT: ori r9, r9, 63421 +; P8BE-NEXT: ori r8, r8, 33437 +; P8BE-NEXT: ori r10, r10, 37253 +; P8BE-NEXT: clrldi r5, r4, 48 +; P8BE-NEXT: rldicl r7, r4, 32, 48 +; P8BE-NEXT: rldicl r6, r4, 48, 48 +; P8BE-NEXT: rldicl r4, r4, 16, 48 +; P8BE-NEXT: extsh r5, r5 +; P8BE-NEXT: extsh r7, r7 +; P8BE-NEXT: extsh r6, r6 +; P8BE-NEXT: extsw r5, r5 +; P8BE-NEXT: extsh r4, r4 +; P8BE-NEXT: extsw r7, r7 +; P8BE-NEXT: extsw r6, r6 +; P8BE-NEXT: mulld r3, r5, r3 +; P8BE-NEXT: extsw r4, r4 +; P8BE-NEXT: mulld r9, r7, r9 +; P8BE-NEXT: mulld r8, r6, r8 +; P8BE-NEXT: mulld r10, r4, r10 +; P8BE-NEXT: rldicl r11, r3, 1, 63 +; P8BE-NEXT: rldicl r3, r3, 32, 32 +; P8BE-NEXT: rldicl r9, r9, 32, 32 +; P8BE-NEXT: rldicl r12, r8, 1, 63 +; P8BE-NEXT: rldicl r8, r8, 32, 32 +; P8BE-NEXT: rldicl r10, r10, 32, 32 +; P8BE-NEXT: subf r9, r7, r9 +; P8BE-NEXT: srawi r3, r3, 8 +; P8BE-NEXT: srawi r8, r8, 5 +; P8BE-NEXT: add r10, r10, r4 +; P8BE-NEXT: add r3, r3, r11 +; P8BE-NEXT: srwi r11, r9, 31 +; P8BE-NEXT: add r8, r8, r12 +; P8BE-NEXT: srawi r9, r9, 6 +; P8BE-NEXT: mulli r3, r3, -1003 +; P8BE-NEXT: add r9, r9, r11 +; P8BE-NEXT: srwi r11, r10, 31 +; P8BE-NEXT: srawi r10, r10, 6 +; P8BE-NEXT: mulli r8, r8, 98 +; P8BE-NEXT: add r10, r10, r11 +; P8BE-NEXT: mulli r9, r9, -124 +; P8BE-NEXT: mulli r10, r10, 95 +; P8BE-NEXT: subf r3, r3, r5 +; P8BE-NEXT: sldi r3, r3, 48 +; P8BE-NEXT: subf r5, r8, r6 +; P8BE-NEXT: mtvsrd v2, r3 +; P8BE-NEXT: subf r6, r9, r7 +; P8BE-NEXT: sldi r3, r5, 48 +; P8BE-NEXT: subf r4, r10, r4 +; P8BE-NEXT: mtvsrd v3, r3 +; P8BE-NEXT: sldi r3, r6, 48 +; P8BE-NEXT: sldi r4, r4, 48 +; P8BE-NEXT: mtvsrd v4, r3 +; P8BE-NEXT: mtvsrd v5, r4 +; P8BE-NEXT: vmrghh v2, v3, v2 +; P8BE-NEXT: vmrghh v3, v5, v4 +; P8BE-NEXT: vmrghw v2, v3, v2 +; P8BE-NEXT: blr + %1 = srem <4 x i16> %x, + ret <4 x i16> %1 +} + +define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) { +; P9LE-LABEL: fold_srem_vec_2: +; P9LE: # %bb.0: +; P9LE-NEXT: li r3, 0 +; P9LE-NEXT: vextuhrx r3, r3, v2 +; P9LE-NEXT: extsh r4, r3 +; P9LE-NEXT: lis r5, -21386 +; P9LE-NEXT: ori r5, r5, 37253 +; P9LE-NEXT: extsw r4, r4 +; P9LE-NEXT: mulld r6, r4, r5 +; P9LE-NEXT: rldicl r6, r6, 32, 32 +; P9LE-NEXT: add r4, r6, r4 +; P9LE-NEXT: srwi r6, r4, 31 +; P9LE-NEXT: srawi r4, r4, 6 +; P9LE-NEXT: add r4, r4, r6 +; P9LE-NEXT: mulli r4, r4, 95 +; P9LE-NEXT: subf r3, r4, r3 +; P9LE-NEXT: mtvsrd f0, r3 +; P9LE-NEXT: li r3, 2 +; P9LE-NEXT: vextuhrx r3, r3, v2 +; P9LE-NEXT: extsh r4, r3 +; P9LE-NEXT: extsw r4, r4 +; P9LE-NEXT: mulld r6, r4, r5 +; P9LE-NEXT: rldicl r6, r6, 32, 32 +; P9LE-NEXT: add r4, r6, r4 +; P9LE-NEXT: srwi r6, r4, 31 +; P9LE-NEXT: srawi r4, r4, 6 +; P9LE-NEXT: add r4, r4, r6 +; P9LE-NEXT: mulli r4, r4, 95 +; P9LE-NEXT: subf r3, r4, r3 +; P9LE-NEXT: xxswapd v3, vs0 +; P9LE-NEXT: mtvsrd f0, r3 +; P9LE-NEXT: li r3, 4 +; P9LE-NEXT: vextuhrx r3, r3, v2 +; P9LE-NEXT: extsh r4, r3 +; P9LE-NEXT: extsw r4, r4 +; P9LE-NEXT: mulld r6, r4, r5 +; P9LE-NEXT: rldicl r6, r6, 32, 32 +; P9LE-NEXT: add r4, r6, r4 +; P9LE-NEXT: srwi r6, r4, 31 +; P9LE-NEXT: srawi r4, r4, 6 +; P9LE-NEXT: add r4, r4, r6 +; P9LE-NEXT: mulli r4, r4, 95 +; P9LE-NEXT: subf r3, r4, r3 +; P9LE-NEXT: xxswapd v4, vs0 +; P9LE-NEXT: mtvsrd f0, r3 +; P9LE-NEXT: li r3, 6 +; P9LE-NEXT: vextuhrx r3, r3, v2 +; P9LE-NEXT: extsh r4, r3 +; P9LE-NEXT: extsw r4, r4 +; P9LE-NEXT: mulld r5, r4, r5 +; P9LE-NEXT: rldicl r5, r5, 32, 32 +; P9LE-NEXT: add r4, r5, r4 +; P9LE-NEXT: srwi r5, r4, 31 +; P9LE-NEXT: srawi r4, r4, 6 +; P9LE-NEXT: add r4, r4, r5 +; P9LE-NEXT: mulli r4, r4, 95 +; P9LE-NEXT: subf r3, r4, r3 +; P9LE-NEXT: vmrglh v3, v4, v3 +; P9LE-NEXT: xxswapd v4, vs0 +; P9LE-NEXT: mtvsrd f0, r3 +; P9LE-NEXT: xxswapd v2, vs0 +; P9LE-NEXT: vmrglh v2, v2, v4 +; P9LE-NEXT: vmrglw v2, v2, v3 +; P9LE-NEXT: blr +; +; P9BE-LABEL: fold_srem_vec_2: +; P9BE: # %bb.0: +; P9BE-NEXT: li r3, 6 +; P9BE-NEXT: vextuhlx r3, r3, v2 +; P9BE-NEXT: extsh r3, r3 +; P9BE-NEXT: lis r4, -21386 +; P9BE-NEXT: ori r4, r4, 37253 +; P9BE-NEXT: extsw r3, r3 +; P9BE-NEXT: mulld r5, r3, r4 +; P9BE-NEXT: rldicl r5, r5, 32, 32 +; P9BE-NEXT: add r5, r5, r3 +; P9BE-NEXT: srwi r6, r5, 31 +; P9BE-NEXT: srawi r5, r5, 6 +; P9BE-NEXT: add r5, r5, r6 +; P9BE-NEXT: mulli r5, r5, 95 +; P9BE-NEXT: subf r3, r5, r3 +; P9BE-NEXT: sldi r3, r3, 48 +; P9BE-NEXT: mtvsrd v3, r3 +; P9BE-NEXT: li r3, 4 +; P9BE-NEXT: vextuhlx r3, r3, v2 +; P9BE-NEXT: extsh r3, r3 +; P9BE-NEXT: extsw r3, r3 +; P9BE-NEXT: mulld r5, r3, r4 +; P9BE-NEXT: rldicl r5, r5, 32, 32 +; P9BE-NEXT: add r5, r5, r3 +; P9BE-NEXT: srwi r6, r5, 31 +; P9BE-NEXT: srawi r5, r5, 6 +; P9BE-NEXT: add r5, r5, r6 +; P9BE-NEXT: mulli r5, r5, 95 +; P9BE-NEXT: subf r3, r5, r3 +; P9BE-NEXT: sldi r3, r3, 48 +; P9BE-NEXT: mtvsrd v4, r3 +; P9BE-NEXT: li r3, 2 +; P9BE-NEXT: vextuhlx r3, r3, v2 +; P9BE-NEXT: extsh r3, r3 +; P9BE-NEXT: extsw r3, r3 +; P9BE-NEXT: mulld r5, r3, r4 +; P9BE-NEXT: rldicl r5, r5, 32, 32 +; P9BE-NEXT: add r5, r5, r3 +; P9BE-NEXT: srwi r6, r5, 31 +; P9BE-NEXT: srawi r5, r5, 6 +; P9BE-NEXT: add r5, r5, r6 +; P9BE-NEXT: mulli r5, r5, 95 +; P9BE-NEXT: subf r3, r5, r3 +; P9BE-NEXT: sldi r3, r3, 48 +; P9BE-NEXT: vmrghh v3, v4, v3 +; P9BE-NEXT: mtvsrd v4, r3 +; P9BE-NEXT: li r3, 0 +; P9BE-NEXT: vextuhlx r3, r3, v2 +; P9BE-NEXT: extsh r3, r3 +; P9BE-NEXT: extsw r3, r3 +; P9BE-NEXT: mulld r4, r3, r4 +; P9BE-NEXT: rldicl r4, r4, 32, 32 +; P9BE-NEXT: add r4, r4, r3 +; P9BE-NEXT: srwi r5, r4, 31 +; P9BE-NEXT: srawi r4, r4, 6 +; P9BE-NEXT: add r4, r4, r5 +; P9BE-NEXT: mulli r4, r4, 95 +; P9BE-NEXT: subf r3, r4, r3 +; P9BE-NEXT: sldi r3, r3, 48 +; P9BE-NEXT: mtvsrd v2, r3 +; P9BE-NEXT: vmrghh v2, v2, v4 +; P9BE-NEXT: vmrghw v2, v2, v3 +; P9BE-NEXT: blr +; +; P8LE-LABEL: fold_srem_vec_2: +; P8LE: # %bb.0: +; P8LE-NEXT: xxswapd vs0, v2 +; P8LE-NEXT: lis r4, -21386 +; P8LE-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; P8LE-NEXT: ori r4, r4, 37253 +; P8LE-NEXT: mfvsrd r5, f0 +; P8LE-NEXT: clrldi r3, r5, 48 +; P8LE-NEXT: rldicl r7, r5, 32, 48 +; P8LE-NEXT: extsh r8, r3 +; P8LE-NEXT: rldicl r6, r5, 48, 48 +; P8LE-NEXT: extsh r10, r7 +; P8LE-NEXT: rldicl r5, r5, 16, 48 +; P8LE-NEXT: extsw r8, r8 +; P8LE-NEXT: extsh r9, r6 +; P8LE-NEXT: extsw r10, r10 +; P8LE-NEXT: extsh r11, r5 +; P8LE-NEXT: mulld r12, r8, r4 +; P8LE-NEXT: extsw r9, r9 +; P8LE-NEXT: extsw r11, r11 +; P8LE-NEXT: mulld r30, r10, r4 +; P8LE-NEXT: mulld r0, r9, r4 +; P8LE-NEXT: mulld r4, r11, r4 +; P8LE-NEXT: rldicl r12, r12, 32, 32 +; P8LE-NEXT: add r8, r12, r8 +; P8LE-NEXT: rldicl r12, r30, 32, 32 +; P8LE-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; P8LE-NEXT: rldicl r0, r0, 32, 32 +; P8LE-NEXT: rldicl r4, r4, 32, 32 +; P8LE-NEXT: add r10, r12, r10 +; P8LE-NEXT: add r9, r0, r9 +; P8LE-NEXT: srwi r0, r8, 31 +; P8LE-NEXT: add r4, r4, r11 +; P8LE-NEXT: srwi r11, r10, 31 +; P8LE-NEXT: srawi r8, r8, 6 +; P8LE-NEXT: srawi r10, r10, 6 +; P8LE-NEXT: srwi r12, r9, 31 +; P8LE-NEXT: add r8, r8, r0 +; P8LE-NEXT: srawi r9, r9, 6 +; P8LE-NEXT: add r10, r10, r11 +; P8LE-NEXT: srwi r11, r4, 31 +; P8LE-NEXT: srawi r4, r4, 6 +; P8LE-NEXT: add r9, r9, r12 +; P8LE-NEXT: mulli r8, r8, 95 +; P8LE-NEXT: add r4, r4, r11 +; P8LE-NEXT: mulli r9, r9, 95 +; P8LE-NEXT: mulli r10, r10, 95 +; P8LE-NEXT: mulli r4, r4, 95 +; P8LE-NEXT: subf r3, r8, r3 +; P8LE-NEXT: subf r6, r9, r6 +; P8LE-NEXT: mtvsrd f0, r3 +; P8LE-NEXT: subf r3, r10, r7 +; P8LE-NEXT: subf r4, r4, r5 +; P8LE-NEXT: mtvsrd f1, r6 +; P8LE-NEXT: mtvsrd f2, r3 +; P8LE-NEXT: xxswapd v2, vs0 +; P8LE-NEXT: mtvsrd f3, r4 +; P8LE-NEXT: xxswapd v3, vs1 +; P8LE-NEXT: xxswapd v4, vs2 +; P8LE-NEXT: xxswapd v5, vs3 +; P8LE-NEXT: vmrglh v2, v3, v2 +; P8LE-NEXT: vmrglh v3, v5, v4 +; P8LE-NEXT: vmrglw v2, v3, v2 +; P8LE-NEXT: blr +; +; P8BE-LABEL: fold_srem_vec_2: +; P8BE: # %bb.0: +; P8BE-NEXT: mfvsrd r4, v2 +; P8BE-NEXT: lis r3, -21386 +; P8BE-NEXT: ori r3, r3, 37253 +; P8BE-NEXT: clrldi r5, r4, 48 +; P8BE-NEXT: rldicl r6, r4, 48, 48 +; P8BE-NEXT: extsh r5, r5 +; P8BE-NEXT: rldicl r7, r4, 32, 48 +; P8BE-NEXT: extsh r6, r6 +; P8BE-NEXT: extsw r5, r5 +; P8BE-NEXT: rldicl r4, r4, 16, 48 +; P8BE-NEXT: extsh r7, r7 +; P8BE-NEXT: extsw r6, r6 +; P8BE-NEXT: mulld r8, r5, r3 +; P8BE-NEXT: extsh r4, r4 +; P8BE-NEXT: extsw r7, r7 +; P8BE-NEXT: mulld r9, r6, r3 +; P8BE-NEXT: extsw r4, r4 +; P8BE-NEXT: mulld r10, r7, r3 +; P8BE-NEXT: mulld r3, r4, r3 +; P8BE-NEXT: rldicl r8, r8, 32, 32 +; P8BE-NEXT: rldicl r9, r9, 32, 32 +; P8BE-NEXT: add r8, r8, r5 +; P8BE-NEXT: rldicl r10, r10, 32, 32 +; P8BE-NEXT: add r9, r9, r6 +; P8BE-NEXT: srwi r11, r8, 31 +; P8BE-NEXT: srawi r8, r8, 6 +; P8BE-NEXT: rldicl r3, r3, 32, 32 +; P8BE-NEXT: add r10, r10, r7 +; P8BE-NEXT: add r8, r8, r11 +; P8BE-NEXT: srwi r11, r9, 31 +; P8BE-NEXT: add r3, r3, r4 +; P8BE-NEXT: srawi r9, r9, 6 +; P8BE-NEXT: mulli r8, r8, 95 +; P8BE-NEXT: add r9, r9, r11 +; P8BE-NEXT: srwi r11, r10, 31 +; P8BE-NEXT: srawi r10, r10, 6 +; P8BE-NEXT: mulli r9, r9, 95 +; P8BE-NEXT: add r10, r10, r11 +; P8BE-NEXT: srwi r11, r3, 31 +; P8BE-NEXT: srawi r3, r3, 6 +; P8BE-NEXT: mulli r10, r10, 95 +; P8BE-NEXT: subf r5, r8, r5 +; P8BE-NEXT: add r3, r3, r11 +; P8BE-NEXT: sldi r5, r5, 48 +; P8BE-NEXT: mulli r3, r3, 95 +; P8BE-NEXT: subf r6, r9, r6 +; P8BE-NEXT: mtvsrd v2, r5 +; P8BE-NEXT: sldi r6, r6, 48 +; P8BE-NEXT: subf r7, r10, r7 +; P8BE-NEXT: mtvsrd v3, r6 +; P8BE-NEXT: subf r3, r3, r4 +; P8BE-NEXT: sldi r4, r7, 48 +; P8BE-NEXT: vmrghh v2, v3, v2 +; P8BE-NEXT: sldi r3, r3, 48 +; P8BE-NEXT: mtvsrd v4, r4 +; P8BE-NEXT: mtvsrd v5, r3 +; P8BE-NEXT: vmrghh v3, v5, v4 +; P8BE-NEXT: vmrghw v2, v3, v2 +; P8BE-NEXT: blr + %1 = srem <4 x i16> %x, + ret <4 x i16> %1 +} + + +; Don't fold if we can combine srem with sdiv. +define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) { +; P9LE-LABEL: combine_srem_sdiv: +; P9LE: # %bb.0: +; P9LE-NEXT: li r3, 0 +; P9LE-NEXT: vextuhrx r3, r3, v2 +; P9LE-NEXT: extsh r4, r3 +; P9LE-NEXT: lis r5, -21386 +; P9LE-NEXT: ori r5, r5, 37253 +; P9LE-NEXT: extsw r4, r4 +; P9LE-NEXT: mulld r6, r4, r5 +; P9LE-NEXT: rldicl r6, r6, 32, 32 +; P9LE-NEXT: add r4, r6, r4 +; P9LE-NEXT: srwi r6, r4, 31 +; P9LE-NEXT: srawi r4, r4, 6 +; P9LE-NEXT: add r4, r4, r6 +; P9LE-NEXT: mulli r6, r4, 95 +; P9LE-NEXT: subf r3, r6, r3 +; P9LE-NEXT: mtvsrd f0, r3 +; P9LE-NEXT: li r3, 2 +; P9LE-NEXT: vextuhrx r3, r3, v2 +; P9LE-NEXT: extsh r6, r3 +; P9LE-NEXT: extsw r6, r6 +; P9LE-NEXT: mulld r7, r6, r5 +; P9LE-NEXT: rldicl r7, r7, 32, 32 +; P9LE-NEXT: add r6, r7, r6 +; P9LE-NEXT: srwi r7, r6, 31 +; P9LE-NEXT: srawi r6, r6, 6 +; P9LE-NEXT: add r6, r6, r7 +; P9LE-NEXT: mulli r7, r6, 95 +; P9LE-NEXT: subf r3, r7, r3 +; P9LE-NEXT: xxswapd v3, vs0 +; P9LE-NEXT: mtvsrd f0, r3 +; P9LE-NEXT: li r3, 4 +; P9LE-NEXT: vextuhrx r3, r3, v2 +; P9LE-NEXT: extsh r7, r3 +; P9LE-NEXT: extsw r7, r7 +; P9LE-NEXT: mulld r8, r7, r5 +; P9LE-NEXT: rldicl r8, r8, 32, 32 +; P9LE-NEXT: add r7, r8, r7 +; P9LE-NEXT: srwi r8, r7, 31 +; P9LE-NEXT: srawi r7, r7, 6 +; P9LE-NEXT: add r7, r7, r8 +; P9LE-NEXT: mulli r8, r7, 95 +; P9LE-NEXT: subf r3, r8, r3 +; P9LE-NEXT: xxswapd v4, vs0 +; P9LE-NEXT: mtvsrd f0, r3 +; P9LE-NEXT: li r3, 6 +; P9LE-NEXT: vextuhrx r3, r3, v2 +; P9LE-NEXT: extsh r8, r3 +; P9LE-NEXT: extsw r8, r8 +; P9LE-NEXT: mulld r5, r8, r5 +; P9LE-NEXT: rldicl r5, r5, 32, 32 +; P9LE-NEXT: add r5, r5, r8 +; P9LE-NEXT: srwi r8, r5, 31 +; P9LE-NEXT: srawi r5, r5, 6 +; P9LE-NEXT: add r5, r5, r8 +; P9LE-NEXT: mulli r8, r5, 95 +; P9LE-NEXT: subf r3, r8, r3 +; P9LE-NEXT: vmrglh v3, v4, v3 +; P9LE-NEXT: xxswapd v4, vs0 +; P9LE-NEXT: mtvsrd f0, r3 +; P9LE-NEXT: xxswapd v2, vs0 +; P9LE-NEXT: mtvsrd f0, r4 +; P9LE-NEXT: vmrglh v2, v2, v4 +; P9LE-NEXT: vmrglw v2, v2, v3 +; P9LE-NEXT: xxswapd v3, vs0 +; P9LE-NEXT: mtvsrd f0, r6 +; P9LE-NEXT: xxswapd v4, vs0 +; P9LE-NEXT: mtvsrd f0, r7 +; P9LE-NEXT: vmrglh v3, v4, v3 +; P9LE-NEXT: xxswapd v4, vs0 +; P9LE-NEXT: mtvsrd f0, r5 +; P9LE-NEXT: xxswapd v5, vs0 +; P9LE-NEXT: vmrglh v4, v5, v4 +; P9LE-NEXT: vmrglw v3, v4, v3 +; P9LE-NEXT: vadduhm v2, v2, v3 +; P9LE-NEXT: blr +; +; P9BE-LABEL: combine_srem_sdiv: +; P9BE: # %bb.0: +; P9BE-NEXT: li r3, 6 +; P9BE-NEXT: vextuhlx r3, r3, v2 +; P9BE-NEXT: extsh r4, r3 +; P9BE-NEXT: lis r5, -21386 +; P9BE-NEXT: ori r5, r5, 37253 +; P9BE-NEXT: extsw r4, r4 +; P9BE-NEXT: mulld r6, r4, r5 +; P9BE-NEXT: rldicl r6, r6, 32, 32 +; P9BE-NEXT: add r4, r6, r4 +; P9BE-NEXT: srwi r6, r4, 31 +; P9BE-NEXT: srawi r4, r4, 6 +; P9BE-NEXT: add r4, r4, r6 +; P9BE-NEXT: mulli r6, r4, 95 +; P9BE-NEXT: subf r3, r6, r3 +; P9BE-NEXT: sldi r3, r3, 48 +; P9BE-NEXT: mtvsrd v3, r3 +; P9BE-NEXT: li r3, 4 +; P9BE-NEXT: vextuhlx r3, r3, v2 +; P9BE-NEXT: extsh r6, r3 +; P9BE-NEXT: extsw r6, r6 +; P9BE-NEXT: mulld r7, r6, r5 +; P9BE-NEXT: rldicl r7, r7, 32, 32 +; P9BE-NEXT: add r6, r7, r6 +; P9BE-NEXT: srwi r7, r6, 31 +; P9BE-NEXT: srawi r6, r6, 6 +; P9BE-NEXT: add r6, r6, r7 +; P9BE-NEXT: mulli r7, r6, 95 +; P9BE-NEXT: subf r3, r7, r3 +; P9BE-NEXT: sldi r3, r3, 48 +; P9BE-NEXT: mtvsrd v4, r3 +; P9BE-NEXT: li r3, 2 +; P9BE-NEXT: vextuhlx r3, r3, v2 +; P9BE-NEXT: extsh r7, r3 +; P9BE-NEXT: extsw r7, r7 +; P9BE-NEXT: mulld r8, r7, r5 +; P9BE-NEXT: rldicl r8, r8, 32, 32 +; P9BE-NEXT: add r7, r8, r7 +; P9BE-NEXT: srwi r8, r7, 31 +; P9BE-NEXT: srawi r7, r7, 6 +; P9BE-NEXT: add r7, r7, r8 +; P9BE-NEXT: mulli r8, r7, 95 +; P9BE-NEXT: subf r3, r8, r3 +; P9BE-NEXT: sldi r3, r3, 48 +; P9BE-NEXT: vmrghh v3, v4, v3 +; P9BE-NEXT: mtvsrd v4, r3 +; P9BE-NEXT: li r3, 0 +; P9BE-NEXT: vextuhlx r3, r3, v2 +; P9BE-NEXT: extsh r3, r3 +; P9BE-NEXT: extsw r3, r3 +; P9BE-NEXT: mulld r5, r3, r5 +; P9BE-NEXT: rldicl r5, r5, 32, 32 +; P9BE-NEXT: add r5, r5, r3 +; P9BE-NEXT: srwi r8, r5, 31 +; P9BE-NEXT: srawi r5, r5, 6 +; P9BE-NEXT: add r5, r5, r8 +; P9BE-NEXT: mulli r8, r5, 95 +; P9BE-NEXT: subf r3, r8, r3 +; P9BE-NEXT: sldi r3, r3, 48 +; P9BE-NEXT: mtvsrd v2, r3 +; P9BE-NEXT: sldi r3, r4, 48 +; P9BE-NEXT: vmrghh v2, v2, v4 +; P9BE-NEXT: vmrghw v2, v2, v3 +; P9BE-NEXT: mtvsrd v3, r3 +; P9BE-NEXT: sldi r3, r6, 48 +; P9BE-NEXT: mtvsrd v4, r3 +; P9BE-NEXT: sldi r3, r7, 48 +; P9BE-NEXT: vmrghh v3, v4, v3 +; P9BE-NEXT: mtvsrd v4, r3 +; P9BE-NEXT: sldi r3, r5, 48 +; P9BE-NEXT: mtvsrd v5, r3 +; P9BE-NEXT: vmrghh v4, v5, v4 +; P9BE-NEXT: vmrghw v3, v4, v3 +; P9BE-NEXT: vadduhm v2, v2, v3 +; P9BE-NEXT: blr +; +; P8LE-LABEL: combine_srem_sdiv: +; P8LE: # %bb.0: +; P8LE-NEXT: xxswapd vs0, v2 +; P8LE-NEXT: lis r5, -21386 +; P8LE-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; P8LE-NEXT: ori r5, r5, 37253 +; P8LE-NEXT: mfvsrd r6, f0 +; P8LE-NEXT: clrldi r3, r6, 48 +; P8LE-NEXT: rldicl r4, r6, 48, 48 +; P8LE-NEXT: rldicl r7, r6, 32, 48 +; P8LE-NEXT: extsh r8, r3 +; P8LE-NEXT: extsh r9, r4 +; P8LE-NEXT: rldicl r6, r6, 16, 48 +; P8LE-NEXT: extsh r10, r7 +; P8LE-NEXT: extsw r8, r8 +; P8LE-NEXT: extsw r9, r9 +; P8LE-NEXT: extsh r11, r6 +; P8LE-NEXT: extsw r10, r10 +; P8LE-NEXT: mulld r12, r8, r5 +; P8LE-NEXT: extsw r11, r11 +; P8LE-NEXT: mulld r0, r9, r5 +; P8LE-NEXT: mulld r30, r10, r5 +; P8LE-NEXT: mulld r5, r11, r5 +; P8LE-NEXT: rldicl r12, r12, 32, 32 +; P8LE-NEXT: rldicl r0, r0, 32, 32 +; P8LE-NEXT: rldicl r30, r30, 32, 32 +; P8LE-NEXT: add r8, r12, r8 +; P8LE-NEXT: rldicl r5, r5, 32, 32 +; P8LE-NEXT: add r9, r0, r9 +; P8LE-NEXT: add r10, r30, r10 +; P8LE-NEXT: srwi r12, r8, 31 +; P8LE-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; P8LE-NEXT: srawi r8, r8, 6 +; P8LE-NEXT: srawi r0, r9, 6 +; P8LE-NEXT: srwi r9, r9, 31 +; P8LE-NEXT: add r5, r5, r11 +; P8LE-NEXT: add r8, r8, r12 +; P8LE-NEXT: srawi r12, r10, 6 +; P8LE-NEXT: srwi r10, r10, 31 +; P8LE-NEXT: add r9, r0, r9 +; P8LE-NEXT: mulli r0, r8, 95 +; P8LE-NEXT: add r10, r12, r10 +; P8LE-NEXT: mtvsrd f0, r8 +; P8LE-NEXT: srwi r8, r5, 31 +; P8LE-NEXT: srawi r5, r5, 6 +; P8LE-NEXT: mulli r11, r9, 95 +; P8LE-NEXT: mtvsrd f1, r9 +; P8LE-NEXT: mulli r9, r10, 95 +; P8LE-NEXT: add r5, r5, r8 +; P8LE-NEXT: xxswapd v2, vs0 +; P8LE-NEXT: mtvsrd f2, r10 +; P8LE-NEXT: mtvsrd f3, r5 +; P8LE-NEXT: mulli r5, r5, 95 +; P8LE-NEXT: xxswapd v3, vs1 +; P8LE-NEXT: subf r3, r0, r3 +; P8LE-NEXT: xxswapd v1, vs2 +; P8LE-NEXT: mtvsrd f0, r3 +; P8LE-NEXT: subf r4, r11, r4 +; P8LE-NEXT: xxswapd v6, vs3 +; P8LE-NEXT: subf r3, r9, r7 +; P8LE-NEXT: mtvsrd f1, r4 +; P8LE-NEXT: mtvsrd f4, r3 +; P8LE-NEXT: subf r3, r5, r6 +; P8LE-NEXT: mtvsrd f5, r3 +; P8LE-NEXT: xxswapd v4, vs1 +; P8LE-NEXT: vmrglh v2, v3, v2 +; P8LE-NEXT: xxswapd v3, vs0 +; P8LE-NEXT: xxswapd v5, vs4 +; P8LE-NEXT: xxswapd v0, vs5 +; P8LE-NEXT: vmrglh v3, v4, v3 +; P8LE-NEXT: vmrglh v4, v0, v5 +; P8LE-NEXT: vmrglh v5, v6, v1 +; P8LE-NEXT: vmrglw v3, v4, v3 +; P8LE-NEXT: vmrglw v2, v5, v2 +; P8LE-NEXT: vadduhm v2, v3, v2 +; P8LE-NEXT: blr +; +; P8BE-LABEL: combine_srem_sdiv: +; P8BE: # %bb.0: +; P8BE-NEXT: mfvsrd r6, v2 +; P8BE-NEXT: lis r5, -21386 +; P8BE-NEXT: ori r5, r5, 37253 +; P8BE-NEXT: clrldi r3, r6, 48 +; P8BE-NEXT: rldicl r4, r6, 48, 48 +; P8BE-NEXT: extsh r8, r3 +; P8BE-NEXT: rldicl r7, r6, 32, 48 +; P8BE-NEXT: extsh r9, r4 +; P8BE-NEXT: rldicl r6, r6, 16, 48 +; P8BE-NEXT: extsw r8, r8 +; P8BE-NEXT: extsh r10, r7 +; P8BE-NEXT: extsw r9, r9 +; P8BE-NEXT: extsh r6, r6 +; P8BE-NEXT: mulld r11, r8, r5 +; P8BE-NEXT: extsw r10, r10 +; P8BE-NEXT: extsw r6, r6 +; P8BE-NEXT: mulld r12, r9, r5 +; P8BE-NEXT: mulld r0, r10, r5 +; P8BE-NEXT: mulld r5, r6, r5 +; P8BE-NEXT: rldicl r11, r11, 32, 32 +; P8BE-NEXT: rldicl r12, r12, 32, 32 +; P8BE-NEXT: add r8, r11, r8 +; P8BE-NEXT: rldicl r0, r0, 32, 32 +; P8BE-NEXT: rldicl r5, r5, 32, 32 +; P8BE-NEXT: add r9, r12, r9 +; P8BE-NEXT: srawi r11, r8, 6 +; P8BE-NEXT: srwi r8, r8, 31 +; P8BE-NEXT: add r10, r0, r10 +; P8BE-NEXT: add r5, r5, r6 +; P8BE-NEXT: srawi r12, r9, 6 +; P8BE-NEXT: srwi r9, r9, 31 +; P8BE-NEXT: add r8, r11, r8 +; P8BE-NEXT: srawi r0, r10, 6 +; P8BE-NEXT: srawi r11, r5, 6 +; P8BE-NEXT: srwi r10, r10, 31 +; P8BE-NEXT: add r9, r12, r9 +; P8BE-NEXT: srwi r5, r5, 31 +; P8BE-NEXT: mulli r12, r8, 95 +; P8BE-NEXT: add r10, r0, r10 +; P8BE-NEXT: add r5, r11, r5 +; P8BE-NEXT: mulli r0, r9, 95 +; P8BE-NEXT: sldi r9, r9, 48 +; P8BE-NEXT: sldi r8, r8, 48 +; P8BE-NEXT: mtvsrd v3, r9 +; P8BE-NEXT: mulli r9, r5, 95 +; P8BE-NEXT: mtvsrd v2, r8 +; P8BE-NEXT: mulli r8, r10, 95 +; P8BE-NEXT: sldi r10, r10, 48 +; P8BE-NEXT: subf r3, r12, r3 +; P8BE-NEXT: mtvsrd v4, r10 +; P8BE-NEXT: subf r4, r0, r4 +; P8BE-NEXT: sldi r3, r3, 48 +; P8BE-NEXT: vmrghh v2, v3, v2 +; P8BE-NEXT: sldi r4, r4, 48 +; P8BE-NEXT: mtvsrd v3, r3 +; P8BE-NEXT: subf r3, r9, r6 +; P8BE-NEXT: subf r7, r8, r7 +; P8BE-NEXT: mtvsrd v5, r4 +; P8BE-NEXT: sldi r3, r3, 48 +; P8BE-NEXT: sldi r6, r7, 48 +; P8BE-NEXT: mtvsrd v1, r3 +; P8BE-NEXT: sldi r3, r5, 48 +; P8BE-NEXT: mtvsrd v0, r6 +; P8BE-NEXT: vmrghh v3, v5, v3 +; P8BE-NEXT: mtvsrd v5, r3 +; P8BE-NEXT: vmrghh v0, v1, v0 +; P8BE-NEXT: vmrghh v4, v5, v4 +; P8BE-NEXT: vmrghw v3, v0, v3 +; P8BE-NEXT: vmrghw v2, v4, v2 +; P8BE-NEXT: vadduhm v2, v3, v2 +; P8BE-NEXT: blr + %1 = srem <4 x i16> %x, + %2 = sdiv <4 x i16> %x, + %3 = add <4 x i16> %1, %2 + ret <4 x i16> %3 +} + +; Don't fold for divisors that are a power of two. +define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) { +; P9LE-LABEL: dont_fold_srem_power_of_two: +; P9LE: # %bb.0: +; P9LE-NEXT: li r3, 0 +; P9LE-NEXT: vextuhrx r3, r3, v2 +; P9LE-NEXT: extsh r4, r3 +; P9LE-NEXT: srawi r4, r4, 6 +; P9LE-NEXT: addze r4, r4 +; P9LE-NEXT: slwi r4, r4, 6 +; P9LE-NEXT: subf r3, r4, r3 +; P9LE-NEXT: mtvsrd f0, r3 +; P9LE-NEXT: li r3, 2 +; P9LE-NEXT: vextuhrx r3, r3, v2 +; P9LE-NEXT: extsh r4, r3 +; P9LE-NEXT: srawi r4, r4, 5 +; P9LE-NEXT: addze r4, r4 +; P9LE-NEXT: slwi r4, r4, 5 +; P9LE-NEXT: subf r3, r4, r3 +; P9LE-NEXT: xxswapd v3, vs0 +; P9LE-NEXT: mtvsrd f0, r3 +; P9LE-NEXT: li r3, 6 +; P9LE-NEXT: vextuhrx r3, r3, v2 +; P9LE-NEXT: extsh r4, r3 +; P9LE-NEXT: lis r5, -21386 +; P9LE-NEXT: ori r5, r5, 37253 +; P9LE-NEXT: xxswapd v4, vs0 +; P9LE-NEXT: vmrglh v3, v4, v3 +; P9LE-NEXT: extsw r4, r4 +; P9LE-NEXT: mulld r5, r4, r5 +; P9LE-NEXT: rldicl r5, r5, 32, 32 +; P9LE-NEXT: add r4, r5, r4 +; P9LE-NEXT: srwi r5, r4, 31 +; P9LE-NEXT: srawi r4, r4, 6 +; P9LE-NEXT: add r4, r4, r5 +; P9LE-NEXT: mulli r4, r4, 95 +; P9LE-NEXT: subf r3, r4, r3 +; P9LE-NEXT: mtvsrd f0, r3 +; P9LE-NEXT: li r3, 4 +; P9LE-NEXT: vextuhrx r3, r3, v2 +; P9LE-NEXT: extsh r4, r3 +; P9LE-NEXT: srawi r4, r4, 3 +; P9LE-NEXT: addze r4, r4 +; P9LE-NEXT: slwi r4, r4, 3 +; P9LE-NEXT: subf r3, r4, r3 +; P9LE-NEXT: xxswapd v4, vs0 +; P9LE-NEXT: mtvsrd f0, r3 +; P9LE-NEXT: xxswapd v2, vs0 +; P9LE-NEXT: vmrglh v2, v4, v2 +; P9LE-NEXT: vmrglw v2, v2, v3 +; P9LE-NEXT: blr +; +; P9BE-LABEL: dont_fold_srem_power_of_two: +; P9BE: # %bb.0: +; P9BE-NEXT: li r3, 2 +; P9BE-NEXT: vextuhlx r3, r3, v2 +; P9BE-NEXT: extsh r3, r3 +; P9BE-NEXT: srawi r4, r3, 5 +; P9BE-NEXT: addze r4, r4 +; P9BE-NEXT: slwi r4, r4, 5 +; P9BE-NEXT: subf r3, r4, r3 +; P9BE-NEXT: sldi r3, r3, 48 +; P9BE-NEXT: mtvsrd v3, r3 +; P9BE-NEXT: li r3, 0 +; P9BE-NEXT: vextuhlx r3, r3, v2 +; P9BE-NEXT: extsh r3, r3 +; P9BE-NEXT: srawi r4, r3, 6 +; P9BE-NEXT: addze r4, r4 +; P9BE-NEXT: slwi r4, r4, 6 +; P9BE-NEXT: subf r3, r4, r3 +; P9BE-NEXT: lis r4, -21386 +; P9BE-NEXT: sldi r3, r3, 48 +; P9BE-NEXT: mtvsrd v4, r3 +; P9BE-NEXT: li r3, 6 +; P9BE-NEXT: vextuhlx r3, r3, v2 +; P9BE-NEXT: extsh r3, r3 +; P9BE-NEXT: extsw r3, r3 +; P9BE-NEXT: ori r4, r4, 37253 +; P9BE-NEXT: mulld r4, r3, r4 +; P9BE-NEXT: rldicl r4, r4, 32, 32 +; P9BE-NEXT: add r4, r4, r3 +; P9BE-NEXT: srwi r5, r4, 31 +; P9BE-NEXT: srawi r4, r4, 6 +; P9BE-NEXT: add r4, r4, r5 +; P9BE-NEXT: mulli r4, r4, 95 +; P9BE-NEXT: subf r3, r4, r3 +; P9BE-NEXT: sldi r3, r3, 48 +; P9BE-NEXT: vmrghh v3, v4, v3 +; P9BE-NEXT: mtvsrd v4, r3 +; P9BE-NEXT: li r3, 4 +; P9BE-NEXT: vextuhlx r3, r3, v2 +; P9BE-NEXT: extsh r3, r3 +; P9BE-NEXT: srawi r4, r3, 3 +; P9BE-NEXT: addze r4, r4 +; P9BE-NEXT: slwi r4, r4, 3 +; P9BE-NEXT: subf r3, r4, r3 +; P9BE-NEXT: sldi r3, r3, 48 +; P9BE-NEXT: mtvsrd v2, r3 +; P9BE-NEXT: vmrghh v2, v2, v4 +; P9BE-NEXT: vmrghw v2, v3, v2 +; P9BE-NEXT: blr +; +; P8LE-LABEL: dont_fold_srem_power_of_two: +; P8LE: # %bb.0: +; P8LE-NEXT: xxswapd vs0, v2 +; P8LE-NEXT: lis r3, -21386 +; P8LE-NEXT: ori r3, r3, 37253 +; P8LE-NEXT: mfvsrd r4, f0 +; P8LE-NEXT: rldicl r5, r4, 16, 48 +; P8LE-NEXT: clrldi r7, r4, 48 +; P8LE-NEXT: extsh r6, r5 +; P8LE-NEXT: extsh r8, r7 +; P8LE-NEXT: extsw r6, r6 +; P8LE-NEXT: rldicl r9, r4, 48, 48 +; P8LE-NEXT: mulld r3, r6, r3 +; P8LE-NEXT: srawi r8, r8, 6 +; P8LE-NEXT: extsh r10, r9 +; P8LE-NEXT: addze r8, r8 +; P8LE-NEXT: rldicl r4, r4, 32, 48 +; P8LE-NEXT: srawi r10, r10, 5 +; P8LE-NEXT: slwi r8, r8, 6 +; P8LE-NEXT: subf r7, r8, r7 +; P8LE-NEXT: rldicl r3, r3, 32, 32 +; P8LE-NEXT: mtvsrd f0, r7 +; P8LE-NEXT: add r3, r3, r6 +; P8LE-NEXT: addze r6, r10 +; P8LE-NEXT: srwi r10, r3, 31 +; P8LE-NEXT: srawi r3, r3, 6 +; P8LE-NEXT: slwi r6, r6, 5 +; P8LE-NEXT: xxswapd v2, vs0 +; P8LE-NEXT: add r3, r3, r10 +; P8LE-NEXT: extsh r10, r4 +; P8LE-NEXT: subf r6, r6, r9 +; P8LE-NEXT: mulli r3, r3, 95 +; P8LE-NEXT: srawi r8, r10, 3 +; P8LE-NEXT: mtvsrd f1, r6 +; P8LE-NEXT: addze r7, r8 +; P8LE-NEXT: xxswapd v3, vs1 +; P8LE-NEXT: subf r3, r3, r5 +; P8LE-NEXT: slwi r5, r7, 3 +; P8LE-NEXT: subf r4, r5, r4 +; P8LE-NEXT: mtvsrd f2, r3 +; P8LE-NEXT: mtvsrd f3, r4 +; P8LE-NEXT: xxswapd v4, vs2 +; P8LE-NEXT: vmrglh v2, v3, v2 +; P8LE-NEXT: xxswapd v5, vs3 +; P8LE-NEXT: vmrglh v3, v4, v5 +; P8LE-NEXT: vmrglw v2, v3, v2 +; P8LE-NEXT: blr +; +; P8BE-LABEL: dont_fold_srem_power_of_two: +; P8BE: # %bb.0: +; P8BE-NEXT: mfvsrd r4, v2 +; P8BE-NEXT: lis r3, -21386 +; P8BE-NEXT: ori r3, r3, 37253 +; P8BE-NEXT: clrldi r5, r4, 48 +; P8BE-NEXT: rldicl r6, r4, 32, 48 +; P8BE-NEXT: extsh r5, r5 +; P8BE-NEXT: extsh r6, r6 +; P8BE-NEXT: extsw r5, r5 +; P8BE-NEXT: rldicl r7, r4, 16, 48 +; P8BE-NEXT: mulld r3, r5, r3 +; P8BE-NEXT: srawi r8, r6, 5 +; P8BE-NEXT: extsh r7, r7 +; P8BE-NEXT: addze r8, r8 +; P8BE-NEXT: rldicl r4, r4, 48, 48 +; P8BE-NEXT: srawi r9, r7, 6 +; P8BE-NEXT: extsh r4, r4 +; P8BE-NEXT: slwi r8, r8, 5 +; P8BE-NEXT: addze r9, r9 +; P8BE-NEXT: subf r6, r8, r6 +; P8BE-NEXT: rldicl r3, r3, 32, 32 +; P8BE-NEXT: slwi r8, r9, 6 +; P8BE-NEXT: add r3, r3, r5 +; P8BE-NEXT: subf r7, r8, r7 +; P8BE-NEXT: srwi r10, r3, 31 +; P8BE-NEXT: srawi r3, r3, 6 +; P8BE-NEXT: add r3, r3, r10 +; P8BE-NEXT: srawi r9, r4, 3 +; P8BE-NEXT: mulli r3, r3, 95 +; P8BE-NEXT: sldi r6, r6, 48 +; P8BE-NEXT: addze r8, r9 +; P8BE-NEXT: mtvsrd v2, r6 +; P8BE-NEXT: slwi r6, r8, 3 +; P8BE-NEXT: subf r4, r6, r4 +; P8BE-NEXT: sldi r4, r4, 48 +; P8BE-NEXT: subf r3, r3, r5 +; P8BE-NEXT: sldi r5, r7, 48 +; P8BE-NEXT: mtvsrd v5, r4 +; P8BE-NEXT: sldi r3, r3, 48 +; P8BE-NEXT: mtvsrd v3, r5 +; P8BE-NEXT: mtvsrd v4, r3 +; P8BE-NEXT: vmrghh v2, v3, v2 +; P8BE-NEXT: vmrghh v3, v5, v4 +; P8BE-NEXT: vmrghw v2, v2, v3 +; P8BE-NEXT: blr + %1 = srem <4 x i16> %x, + ret <4 x i16> %1 +} + +; Don't fold if the divisor is one. +define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) { +; P9LE-LABEL: dont_fold_srem_one: +; P9LE: # %bb.0: +; P9LE-NEXT: li r3, 2 +; P9LE-NEXT: vextuhrx r3, r3, v2 +; P9LE-NEXT: extsh r4, r3 +; P9LE-NEXT: lis r5, -14230 +; P9LE-NEXT: ori r5, r5, 30865 +; P9LE-NEXT: extsw r4, r4 +; P9LE-NEXT: mulld r5, r4, r5 +; P9LE-NEXT: rldicl r5, r5, 32, 32 +; P9LE-NEXT: xxlxor v4, v4, v4 +; P9LE-NEXT: add r4, r5, r4 +; P9LE-NEXT: srwi r5, r4, 31 +; P9LE-NEXT: srawi r4, r4, 9 +; P9LE-NEXT: add r4, r4, r5 +; P9LE-NEXT: lis r5, -19946 +; P9LE-NEXT: mulli r4, r4, 654 +; P9LE-NEXT: subf r3, r4, r3 +; P9LE-NEXT: mtvsrd f0, r3 +; P9LE-NEXT: li r3, 4 +; P9LE-NEXT: vextuhrx r3, r3, v2 +; P9LE-NEXT: extsh r4, r3 +; P9LE-NEXT: extsw r4, r4 +; P9LE-NEXT: ori r5, r5, 17097 +; P9LE-NEXT: mulld r5, r4, r5 +; P9LE-NEXT: rldicl r5, r5, 32, 32 +; P9LE-NEXT: add r4, r5, r4 +; P9LE-NEXT: srwi r5, r4, 31 +; P9LE-NEXT: srawi r4, r4, 4 +; P9LE-NEXT: add r4, r4, r5 +; P9LE-NEXT: lis r5, 24749 +; P9LE-NEXT: mulli r4, r4, 23 +; P9LE-NEXT: subf r3, r4, r3 +; P9LE-NEXT: xxswapd v3, vs0 +; P9LE-NEXT: mtvsrd f0, r3 +; P9LE-NEXT: li r3, 6 +; P9LE-NEXT: vextuhrx r3, r3, v2 +; P9LE-NEXT: extsh r4, r3 +; P9LE-NEXT: extsw r4, r4 +; P9LE-NEXT: ori r5, r5, 47143 +; P9LE-NEXT: mulld r4, r4, r5 +; P9LE-NEXT: rldicl r5, r4, 1, 63 +; P9LE-NEXT: rldicl r4, r4, 32, 32 +; P9LE-NEXT: srawi r4, r4, 11 +; P9LE-NEXT: add r4, r4, r5 +; P9LE-NEXT: mulli r4, r4, 5423 +; P9LE-NEXT: subf r3, r4, r3 +; P9LE-NEXT: vmrglh v3, v3, v4 +; P9LE-NEXT: xxswapd v4, vs0 +; P9LE-NEXT: mtvsrd f0, r3 +; P9LE-NEXT: xxswapd v2, vs0 +; P9LE-NEXT: vmrglh v2, v2, v4 +; P9LE-NEXT: vmrglw v2, v2, v3 +; P9LE-NEXT: blr +; +; P9BE-LABEL: dont_fold_srem_one: +; P9BE: # %bb.0: +; P9BE-NEXT: li r3, 4 +; P9BE-NEXT: vextuhlx r3, r3, v2 +; P9BE-NEXT: extsh r3, r3 +; P9BE-NEXT: lis r4, -19946 +; P9BE-NEXT: ori r4, r4, 17097 +; P9BE-NEXT: extsw r3, r3 +; P9BE-NEXT: mulld r4, r3, r4 +; P9BE-NEXT: rldicl r4, r4, 32, 32 +; P9BE-NEXT: add r4, r4, r3 +; P9BE-NEXT: srwi r5, r4, 31 +; P9BE-NEXT: srawi r4, r4, 4 +; P9BE-NEXT: add r4, r4, r5 +; P9BE-NEXT: mulli r4, r4, 23 +; P9BE-NEXT: subf r3, r4, r3 +; P9BE-NEXT: lis r4, 24749 +; P9BE-NEXT: sldi r3, r3, 48 +; P9BE-NEXT: mtvsrd v3, r3 +; P9BE-NEXT: li r3, 6 +; P9BE-NEXT: vextuhlx r3, r3, v2 +; P9BE-NEXT: extsh r3, r3 +; P9BE-NEXT: extsw r3, r3 +; P9BE-NEXT: ori r4, r4, 47143 +; P9BE-NEXT: mulld r4, r3, r4 +; P9BE-NEXT: rldicl r5, r4, 1, 63 +; P9BE-NEXT: rldicl r4, r4, 32, 32 +; P9BE-NEXT: srawi r4, r4, 11 +; P9BE-NEXT: add r4, r4, r5 +; P9BE-NEXT: mulli r4, r4, 5423 +; P9BE-NEXT: subf r3, r4, r3 +; P9BE-NEXT: lis r4, -14230 +; P9BE-NEXT: sldi r3, r3, 48 +; P9BE-NEXT: mtvsrd v4, r3 +; P9BE-NEXT: li r3, 2 +; P9BE-NEXT: vextuhlx r3, r3, v2 +; P9BE-NEXT: extsh r3, r3 +; P9BE-NEXT: extsw r3, r3 +; P9BE-NEXT: ori r4, r4, 30865 +; P9BE-NEXT: mulld r4, r3, r4 +; P9BE-NEXT: rldicl r4, r4, 32, 32 +; P9BE-NEXT: add r4, r4, r3 +; P9BE-NEXT: srwi r5, r4, 31 +; P9BE-NEXT: srawi r4, r4, 9 +; P9BE-NEXT: add r4, r4, r5 +; P9BE-NEXT: mulli r4, r4, 654 +; P9BE-NEXT: subf r3, r4, r3 +; P9BE-NEXT: sldi r3, r3, 48 +; P9BE-NEXT: mtvsrd v2, r3 +; P9BE-NEXT: li r3, 0 +; P9BE-NEXT: sldi r3, r3, 48 +; P9BE-NEXT: vmrghh v3, v3, v4 +; P9BE-NEXT: mtvsrd v4, r3 +; P9BE-NEXT: vmrghh v2, v4, v2 +; P9BE-NEXT: vmrghw v2, v2, v3 +; P9BE-NEXT: blr +; +; P8LE-LABEL: dont_fold_srem_one: +; P8LE: # %bb.0: +; P8LE-NEXT: xxswapd vs0, v2 +; P8LE-NEXT: lis r3, 24749 +; P8LE-NEXT: lis r8, -19946 +; P8LE-NEXT: lis r10, -14230 +; P8LE-NEXT: xxlxor v5, v5, v5 +; P8LE-NEXT: ori r3, r3, 47143 +; P8LE-NEXT: ori r8, r8, 17097 +; P8LE-NEXT: mfvsrd r4, f0 +; P8LE-NEXT: rldicl r5, r4, 16, 48 +; P8LE-NEXT: rldicl r6, r4, 32, 48 +; P8LE-NEXT: rldicl r4, r4, 48, 48 +; P8LE-NEXT: extsh r7, r5 +; P8LE-NEXT: extsh r9, r6 +; P8LE-NEXT: extsw r7, r7 +; P8LE-NEXT: extsh r11, r4 +; P8LE-NEXT: extsw r9, r9 +; P8LE-NEXT: mulld r3, r7, r3 +; P8LE-NEXT: ori r7, r10, 30865 +; P8LE-NEXT: extsw r10, r11 +; P8LE-NEXT: mulld r8, r9, r8 +; P8LE-NEXT: mulld r7, r10, r7 +; P8LE-NEXT: rldicl r11, r3, 1, 63 +; P8LE-NEXT: rldicl r3, r3, 32, 32 +; P8LE-NEXT: rldicl r8, r8, 32, 32 +; P8LE-NEXT: rldicl r7, r7, 32, 32 +; P8LE-NEXT: add r8, r8, r9 +; P8LE-NEXT: srawi r3, r3, 11 +; P8LE-NEXT: add r7, r7, r10 +; P8LE-NEXT: srwi r9, r8, 31 +; P8LE-NEXT: srawi r8, r8, 4 +; P8LE-NEXT: add r3, r3, r11 +; P8LE-NEXT: add r8, r8, r9 +; P8LE-NEXT: srwi r9, r7, 31 +; P8LE-NEXT: srawi r7, r7, 9 +; P8LE-NEXT: mulli r3, r3, 5423 +; P8LE-NEXT: add r7, r7, r9 +; P8LE-NEXT: mulli r8, r8, 23 +; P8LE-NEXT: mulli r7, r7, 654 +; P8LE-NEXT: subf r3, r3, r5 +; P8LE-NEXT: mtvsrd f0, r3 +; P8LE-NEXT: subf r3, r8, r6 +; P8LE-NEXT: subf r4, r7, r4 +; P8LE-NEXT: mtvsrd f1, r3 +; P8LE-NEXT: mtvsrd f2, r4 +; P8LE-NEXT: xxswapd v2, vs0 +; P8LE-NEXT: xxswapd v3, vs1 +; P8LE-NEXT: xxswapd v4, vs2 +; P8LE-NEXT: vmrglh v2, v2, v3 +; P8LE-NEXT: vmrglh v3, v4, v5 +; P8LE-NEXT: vmrglw v2, v2, v3 +; P8LE-NEXT: blr +; +; P8BE-LABEL: dont_fold_srem_one: +; P8BE: # %bb.0: +; P8BE-NEXT: mfvsrd r4, v2 +; P8BE-NEXT: lis r3, 24749 +; P8BE-NEXT: lis r7, -19946 +; P8BE-NEXT: lis r8, -14230 +; P8BE-NEXT: ori r3, r3, 47143 +; P8BE-NEXT: ori r7, r7, 17097 +; P8BE-NEXT: ori r8, r8, 30865 +; P8BE-NEXT: clrldi r5, r4, 48 +; P8BE-NEXT: rldicl r6, r4, 48, 48 +; P8BE-NEXT: rldicl r4, r4, 32, 48 +; P8BE-NEXT: extsh r5, r5 +; P8BE-NEXT: extsh r6, r6 +; P8BE-NEXT: extsh r4, r4 +; P8BE-NEXT: extsw r5, r5 +; P8BE-NEXT: extsw r6, r6 +; P8BE-NEXT: extsw r4, r4 +; P8BE-NEXT: mulld r3, r5, r3 +; P8BE-NEXT: mulld r7, r6, r7 +; P8BE-NEXT: mulld r8, r4, r8 +; P8BE-NEXT: rldicl r9, r3, 1, 63 +; P8BE-NEXT: rldicl r3, r3, 32, 32 +; P8BE-NEXT: rldicl r7, r7, 32, 32 +; P8BE-NEXT: rldicl r8, r8, 32, 32 +; P8BE-NEXT: srawi r3, r3, 11 +; P8BE-NEXT: add r7, r7, r6 +; P8BE-NEXT: add r8, r8, r4 +; P8BE-NEXT: add r3, r3, r9 +; P8BE-NEXT: srwi r9, r7, 31 +; P8BE-NEXT: srawi r7, r7, 4 +; P8BE-NEXT: mulli r3, r3, 5423 +; P8BE-NEXT: add r7, r7, r9 +; P8BE-NEXT: srwi r9, r8, 31 +; P8BE-NEXT: srawi r8, r8, 9 +; P8BE-NEXT: mulli r7, r7, 23 +; P8BE-NEXT: add r8, r8, r9 +; P8BE-NEXT: li r9, 0 +; P8BE-NEXT: mulli r8, r8, 654 +; P8BE-NEXT: subf r3, r3, r5 +; P8BE-NEXT: sldi r5, r9, 48 +; P8BE-NEXT: sldi r3, r3, 48 +; P8BE-NEXT: mtvsrd v2, r5 +; P8BE-NEXT: subf r5, r7, r6 +; P8BE-NEXT: mtvsrd v3, r3 +; P8BE-NEXT: sldi r3, r5, 48 +; P8BE-NEXT: subf r4, r8, r4 +; P8BE-NEXT: mtvsrd v4, r3 +; P8BE-NEXT: sldi r4, r4, 48 +; P8BE-NEXT: mtvsrd v5, r4 +; P8BE-NEXT: vmrghh v3, v4, v3 +; P8BE-NEXT: vmrghh v2, v2, v5 +; P8BE-NEXT: vmrghw v2, v2, v3 +; P8BE-NEXT: blr + %1 = srem <4 x i16> %x, + ret <4 x i16> %1 +} + +; Don't fold if the divisor is 2^15. +define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) { +; P9LE-LABEL: dont_fold_urem_i16_smax: +; P9LE: # %bb.0: +; P9LE-NEXT: li r3, 4 +; P9LE-NEXT: vextuhrx r3, r3, v2 +; P9LE-NEXT: extsh r4, r3 +; P9LE-NEXT: lis r5, -19946 +; P9LE-NEXT: ori r5, r5, 17097 +; P9LE-NEXT: extsw r4, r4 +; P9LE-NEXT: mulld r5, r4, r5 +; P9LE-NEXT: rldicl r5, r5, 32, 32 +; P9LE-NEXT: add r4, r5, r4 +; P9LE-NEXT: srwi r5, r4, 31 +; P9LE-NEXT: srawi r4, r4, 4 +; P9LE-NEXT: add r4, r4, r5 +; P9LE-NEXT: lis r5, 24749 +; P9LE-NEXT: mulli r4, r4, 23 +; P9LE-NEXT: subf r3, r4, r3 +; P9LE-NEXT: mtvsrd f0, r3 +; P9LE-NEXT: li r3, 6 +; P9LE-NEXT: vextuhrx r3, r3, v2 +; P9LE-NEXT: extsh r4, r3 +; P9LE-NEXT: extsw r4, r4 +; P9LE-NEXT: ori r5, r5, 47143 +; P9LE-NEXT: mulld r4, r4, r5 +; P9LE-NEXT: rldicl r5, r4, 1, 63 +; P9LE-NEXT: rldicl r4, r4, 32, 32 +; P9LE-NEXT: srawi r4, r4, 11 +; P9LE-NEXT: add r4, r4, r5 +; P9LE-NEXT: mulli r4, r4, 5423 +; P9LE-NEXT: subf r3, r4, r3 +; P9LE-NEXT: xxswapd v3, vs0 +; P9LE-NEXT: mtvsrd f0, r3 +; P9LE-NEXT: li r3, 2 +; P9LE-NEXT: vextuhrx r3, r3, v2 +; P9LE-NEXT: extsh r4, r3 +; P9LE-NEXT: srawi r4, r4, 15 +; P9LE-NEXT: addze r4, r4 +; P9LE-NEXT: slwi r4, r4, 15 +; P9LE-NEXT: subf r3, r4, r3 +; P9LE-NEXT: xxswapd v4, vs0 +; P9LE-NEXT: mtvsrd f0, r3 +; P9LE-NEXT: xxswapd v2, vs0 +; P9LE-NEXT: vmrglh v3, v4, v3 +; P9LE-NEXT: xxlxor v4, v4, v4 +; P9LE-NEXT: vmrglh v2, v2, v4 +; P9LE-NEXT: vmrglw v2, v3, v2 +; P9LE-NEXT: blr +; +; P9BE-LABEL: dont_fold_urem_i16_smax: +; P9BE: # %bb.0: +; P9BE-NEXT: li r3, 4 +; P9BE-NEXT: vextuhlx r3, r3, v2 +; P9BE-NEXT: extsh r3, r3 +; P9BE-NEXT: lis r4, -19946 +; P9BE-NEXT: ori r4, r4, 17097 +; P9BE-NEXT: extsw r3, r3 +; P9BE-NEXT: mulld r4, r3, r4 +; P9BE-NEXT: rldicl r4, r4, 32, 32 +; P9BE-NEXT: add r4, r4, r3 +; P9BE-NEXT: srwi r5, r4, 31 +; P9BE-NEXT: srawi r4, r4, 4 +; P9BE-NEXT: add r4, r4, r5 +; P9BE-NEXT: mulli r4, r4, 23 +; P9BE-NEXT: subf r3, r4, r3 +; P9BE-NEXT: lis r4, 24749 +; P9BE-NEXT: sldi r3, r3, 48 +; P9BE-NEXT: mtvsrd v3, r3 +; P9BE-NEXT: li r3, 6 +; P9BE-NEXT: vextuhlx r3, r3, v2 +; P9BE-NEXT: extsh r3, r3 +; P9BE-NEXT: extsw r3, r3 +; P9BE-NEXT: ori r4, r4, 47143 +; P9BE-NEXT: mulld r4, r3, r4 +; P9BE-NEXT: rldicl r5, r4, 1, 63 +; P9BE-NEXT: rldicl r4, r4, 32, 32 +; P9BE-NEXT: srawi r4, r4, 11 +; P9BE-NEXT: add r4, r4, r5 +; P9BE-NEXT: mulli r4, r4, 5423 +; P9BE-NEXT: subf r3, r4, r3 +; P9BE-NEXT: sldi r3, r3, 48 +; P9BE-NEXT: mtvsrd v4, r3 +; P9BE-NEXT: li r3, 2 +; P9BE-NEXT: vextuhlx r3, r3, v2 +; P9BE-NEXT: extsh r3, r3 +; P9BE-NEXT: srawi r4, r3, 15 +; P9BE-NEXT: addze r4, r4 +; P9BE-NEXT: slwi r4, r4, 15 +; P9BE-NEXT: subf r3, r4, r3 +; P9BE-NEXT: sldi r3, r3, 48 +; P9BE-NEXT: mtvsrd v2, r3 +; P9BE-NEXT: li r3, 0 +; P9BE-NEXT: sldi r3, r3, 48 +; P9BE-NEXT: vmrghh v3, v3, v4 +; P9BE-NEXT: mtvsrd v4, r3 +; P9BE-NEXT: vmrghh v2, v4, v2 +; P9BE-NEXT: vmrghw v2, v2, v3 +; P9BE-NEXT: blr +; +; P8LE-LABEL: dont_fold_urem_i16_smax: +; P8LE: # %bb.0: +; P8LE-NEXT: xxswapd vs0, v2 +; P8LE-NEXT: lis r6, 24749 +; P8LE-NEXT: lis r7, -19946 +; P8LE-NEXT: xxlxor v5, v5, v5 +; P8LE-NEXT: ori r6, r6, 47143 +; P8LE-NEXT: ori r7, r7, 17097 +; P8LE-NEXT: mfvsrd r3, f0 +; P8LE-NEXT: rldicl r4, r3, 16, 48 +; P8LE-NEXT: rldicl r5, r3, 32, 48 +; P8LE-NEXT: extsh r8, r4 +; P8LE-NEXT: extsh r9, r5 +; P8LE-NEXT: extsw r8, r8 +; P8LE-NEXT: extsw r9, r9 +; P8LE-NEXT: mulld r6, r8, r6 +; P8LE-NEXT: mulld r7, r9, r7 +; P8LE-NEXT: rldicl r3, r3, 48, 48 +; P8LE-NEXT: rldicl r8, r6, 32, 32 +; P8LE-NEXT: rldicl r7, r7, 32, 32 +; P8LE-NEXT: rldicl r6, r6, 1, 63 +; P8LE-NEXT: srawi r8, r8, 11 +; P8LE-NEXT: add r7, r7, r9 +; P8LE-NEXT: add r6, r8, r6 +; P8LE-NEXT: srwi r8, r7, 31 +; P8LE-NEXT: srawi r7, r7, 4 +; P8LE-NEXT: mulli r6, r6, 5423 +; P8LE-NEXT: add r7, r7, r8 +; P8LE-NEXT: extsh r8, r3 +; P8LE-NEXT: mulli r7, r7, 23 +; P8LE-NEXT: srawi r8, r8, 15 +; P8LE-NEXT: subf r4, r6, r4 +; P8LE-NEXT: addze r6, r8 +; P8LE-NEXT: mtvsrd f0, r4 +; P8LE-NEXT: slwi r4, r6, 15 +; P8LE-NEXT: subf r5, r7, r5 +; P8LE-NEXT: subf r3, r4, r3 +; P8LE-NEXT: mtvsrd f1, r5 +; P8LE-NEXT: xxswapd v2, vs0 +; P8LE-NEXT: mtvsrd f2, r3 +; P8LE-NEXT: xxswapd v3, vs1 +; P8LE-NEXT: xxswapd v4, vs2 +; P8LE-NEXT: vmrglh v2, v2, v3 +; P8LE-NEXT: vmrglh v3, v4, v5 +; P8LE-NEXT: vmrglw v2, v2, v3 +; P8LE-NEXT: blr +; +; P8BE-LABEL: dont_fold_urem_i16_smax: +; P8BE: # %bb.0: +; P8BE-NEXT: mfvsrd r4, v2 +; P8BE-NEXT: lis r3, 24749 +; P8BE-NEXT: lis r7, -19946 +; P8BE-NEXT: ori r3, r3, 47143 +; P8BE-NEXT: ori r7, r7, 17097 +; P8BE-NEXT: clrldi r5, r4, 48 +; P8BE-NEXT: rldicl r6, r4, 48, 48 +; P8BE-NEXT: extsh r5, r5 +; P8BE-NEXT: extsh r6, r6 +; P8BE-NEXT: extsw r5, r5 +; P8BE-NEXT: extsw r6, r6 +; P8BE-NEXT: mulld r3, r5, r3 +; P8BE-NEXT: mulld r7, r6, r7 +; P8BE-NEXT: rldicl r4, r4, 32, 48 +; P8BE-NEXT: extsh r4, r4 +; P8BE-NEXT: rldicl r8, r3, 1, 63 +; P8BE-NEXT: rldicl r3, r3, 32, 32 +; P8BE-NEXT: rldicl r7, r7, 32, 32 +; P8BE-NEXT: srawi r3, r3, 11 +; P8BE-NEXT: add r7, r7, r6 +; P8BE-NEXT: add r3, r3, r8 +; P8BE-NEXT: srwi r8, r7, 31 +; P8BE-NEXT: srawi r7, r7, 4 +; P8BE-NEXT: mulli r3, r3, 5423 +; P8BE-NEXT: add r7, r7, r8 +; P8BE-NEXT: li r8, 0 +; P8BE-NEXT: mulli r7, r7, 23 +; P8BE-NEXT: srawi r9, r4, 15 +; P8BE-NEXT: subf r3, r3, r5 +; P8BE-NEXT: sldi r5, r8, 48 +; P8BE-NEXT: addze r8, r9 +; P8BE-NEXT: mtvsrd v2, r5 +; P8BE-NEXT: subf r5, r7, r6 +; P8BE-NEXT: slwi r6, r8, 15 +; P8BE-NEXT: sldi r3, r3, 48 +; P8BE-NEXT: subf r4, r6, r4 +; P8BE-NEXT: mtvsrd v3, r3 +; P8BE-NEXT: sldi r3, r5, 48 +; P8BE-NEXT: sldi r4, r4, 48 +; P8BE-NEXT: mtvsrd v4, r3 +; P8BE-NEXT: mtvsrd v5, r4 +; P8BE-NEXT: vmrghh v3, v4, v3 +; P8BE-NEXT: vmrghh v2, v2, v5 +; P8BE-NEXT: vmrghw v2, v2, v3 +; P8BE-NEXT: blr + %1 = srem <4 x i16> %x, + ret <4 x i16> %1 +} + +; Don't fold i64 srem. +define <4 x i64> @dont_fold_srem_i64(<4 x i64> %x) { +; P9LE-LABEL: dont_fold_srem_i64: +; P9LE: # %bb.0: +; P9LE-NEXT: lis r4, 24749 +; P9LE-NEXT: ori r4, r4, 47142 +; P9LE-NEXT: sldi r4, r4, 32 +; P9LE-NEXT: oris r4, r4, 58853 +; P9LE-NEXT: mfvsrd r3, v3 +; P9LE-NEXT: ori r4, r4, 6055 +; P9LE-NEXT: mulhd r4, r3, r4 +; P9LE-NEXT: rldicl r5, r4, 1, 63 +; P9LE-NEXT: sradi r4, r4, 11 +; P9LE-NEXT: add r4, r4, r5 +; P9LE-NEXT: lis r5, -19946 +; P9LE-NEXT: mulli r4, r4, 5423 +; P9LE-NEXT: ori r5, r5, 17096 +; P9LE-NEXT: sldi r5, r5, 32 +; P9LE-NEXT: oris r5, r5, 22795 +; P9LE-NEXT: sub r3, r3, r4 +; P9LE-NEXT: mfvsrld r4, v3 +; P9LE-NEXT: ori r5, r5, 8549 +; P9LE-NEXT: mulhd r5, r4, r5 +; P9LE-NEXT: add r5, r5, r4 +; P9LE-NEXT: rldicl r6, r5, 1, 63 +; P9LE-NEXT: sradi r5, r5, 4 +; P9LE-NEXT: add r5, r5, r6 +; P9LE-NEXT: mulli r5, r5, 23 +; P9LE-NEXT: sub r4, r4, r5 +; P9LE-NEXT: mtvsrdd v3, r3, r4 +; P9LE-NEXT: lis r4, 25653 +; P9LE-NEXT: ori r4, r4, 15432 +; P9LE-NEXT: sldi r4, r4, 32 +; P9LE-NEXT: oris r4, r4, 1603 +; P9LE-NEXT: mfvsrd r3, v2 +; P9LE-NEXT: ori r4, r4, 21445 +; P9LE-NEXT: mulhd r4, r3, r4 +; P9LE-NEXT: rldicl r5, r4, 1, 63 +; P9LE-NEXT: sradi r4, r4, 8 +; P9LE-NEXT: add r4, r4, r5 +; P9LE-NEXT: mulli r4, r4, 654 +; P9LE-NEXT: sub r3, r3, r4 +; P9LE-NEXT: li r4, 0 +; P9LE-NEXT: mtvsrdd v2, r3, r4 +; P9LE-NEXT: blr +; +; P9BE-LABEL: dont_fold_srem_i64: +; P9BE: # %bb.0: +; P9BE-NEXT: lis r4, 24749 +; P9BE-NEXT: ori r4, r4, 47142 +; P9BE-NEXT: sldi r4, r4, 32 +; P9BE-NEXT: oris r4, r4, 58853 +; P9BE-NEXT: mfvsrld r3, v3 +; P9BE-NEXT: ori r4, r4, 6055 +; P9BE-NEXT: mulhd r4, r3, r4 +; P9BE-NEXT: rldicl r5, r4, 1, 63 +; P9BE-NEXT: sradi r4, r4, 11 +; P9BE-NEXT: add r4, r4, r5 +; P9BE-NEXT: lis r5, -19946 +; P9BE-NEXT: ori r5, r5, 17096 +; P9BE-NEXT: mulli r4, r4, 5423 +; P9BE-NEXT: sldi r5, r5, 32 +; P9BE-NEXT: oris r5, r5, 22795 +; P9BE-NEXT: sub r3, r3, r4 +; P9BE-NEXT: mfvsrd r4, v3 +; P9BE-NEXT: ori r5, r5, 8549 +; P9BE-NEXT: mulhd r5, r4, r5 +; P9BE-NEXT: add r5, r5, r4 +; P9BE-NEXT: rldicl r6, r5, 1, 63 +; P9BE-NEXT: sradi r5, r5, 4 +; P9BE-NEXT: add r5, r5, r6 +; P9BE-NEXT: mulli r5, r5, 23 +; P9BE-NEXT: sub r4, r4, r5 +; P9BE-NEXT: mtvsrdd v3, r4, r3 +; P9BE-NEXT: lis r4, 25653 +; P9BE-NEXT: ori r4, r4, 15432 +; P9BE-NEXT: sldi r4, r4, 32 +; P9BE-NEXT: oris r4, r4, 1603 +; P9BE-NEXT: mfvsrld r3, v2 +; P9BE-NEXT: ori r4, r4, 21445 +; P9BE-NEXT: mulhd r4, r3, r4 +; P9BE-NEXT: rldicl r5, r4, 1, 63 +; P9BE-NEXT: sradi r4, r4, 8 +; P9BE-NEXT: add r4, r4, r5 +; P9BE-NEXT: mulli r4, r4, 654 +; P9BE-NEXT: sub r3, r3, r4 +; P9BE-NEXT: mtvsrdd v2, 0, r3 +; P9BE-NEXT: blr +; +; P8LE-LABEL: dont_fold_srem_i64: +; P8LE: # %bb.0: +; P8LE-NEXT: lis r3, 24749 +; P8LE-NEXT: lis r4, -19946 +; P8LE-NEXT: lis r5, 25653 +; P8LE-NEXT: xxswapd vs0, v3 +; P8LE-NEXT: mfvsrd r6, v3 +; P8LE-NEXT: ori r3, r3, 47142 +; P8LE-NEXT: ori r4, r4, 17096 +; P8LE-NEXT: ori r5, r5, 15432 +; P8LE-NEXT: mfvsrd r7, v2 +; P8LE-NEXT: sldi r3, r3, 32 +; P8LE-NEXT: sldi r4, r4, 32 +; P8LE-NEXT: sldi r5, r5, 32 +; P8LE-NEXT: oris r3, r3, 58853 +; P8LE-NEXT: oris r4, r4, 22795 +; P8LE-NEXT: mfvsrd r8, f0 +; P8LE-NEXT: oris r5, r5, 1603 +; P8LE-NEXT: ori r3, r3, 6055 +; P8LE-NEXT: ori r4, r4, 8549 +; P8LE-NEXT: ori r5, r5, 21445 +; P8LE-NEXT: mulhd r3, r6, r3 +; P8LE-NEXT: mulhd r5, r7, r5 +; P8LE-NEXT: mulhd r4, r8, r4 +; P8LE-NEXT: rldicl r9, r3, 1, 63 +; P8LE-NEXT: sradi r3, r3, 11 +; P8LE-NEXT: add r3, r3, r9 +; P8LE-NEXT: rldicl r9, r5, 1, 63 +; P8LE-NEXT: add r4, r4, r8 +; P8LE-NEXT: sradi r5, r5, 8 +; P8LE-NEXT: mulli r3, r3, 5423 +; P8LE-NEXT: add r5, r5, r9 +; P8LE-NEXT: rldicl r9, r4, 1, 63 +; P8LE-NEXT: sradi r4, r4, 4 +; P8LE-NEXT: mulli r5, r5, 654 +; P8LE-NEXT: add r4, r4, r9 +; P8LE-NEXT: mulli r4, r4, 23 +; P8LE-NEXT: sub r3, r6, r3 +; P8LE-NEXT: mtvsrd f0, r3 +; P8LE-NEXT: sub r5, r7, r5 +; P8LE-NEXT: mtvsrd f1, r5 +; P8LE-NEXT: sub r3, r8, r4 +; P8LE-NEXT: li r4, 0 +; P8LE-NEXT: mtvsrd f2, r3 +; P8LE-NEXT: mtvsrd f3, r4 +; P8LE-NEXT: xxmrghd v3, vs0, vs2 +; P8LE-NEXT: xxmrghd v2, vs1, vs3 +; P8LE-NEXT: blr +; +; P8BE-LABEL: dont_fold_srem_i64: +; P8BE: # %bb.0: +; P8BE-NEXT: lis r4, -19946 +; P8BE-NEXT: lis r3, 24749 +; P8BE-NEXT: xxswapd vs0, v3 +; P8BE-NEXT: lis r5, 25653 +; P8BE-NEXT: xxswapd vs1, v2 +; P8BE-NEXT: ori r4, r4, 17096 +; P8BE-NEXT: ori r3, r3, 47142 +; P8BE-NEXT: ori r5, r5, 15432 +; P8BE-NEXT: mfvsrd r6, v3 +; P8BE-NEXT: sldi r4, r4, 32 +; P8BE-NEXT: sldi r3, r3, 32 +; P8BE-NEXT: oris r4, r4, 22795 +; P8BE-NEXT: sldi r5, r5, 32 +; P8BE-NEXT: oris r3, r3, 58853 +; P8BE-NEXT: mfvsrd r7, f0 +; P8BE-NEXT: ori r4, r4, 8549 +; P8BE-NEXT: ori r3, r3, 6055 +; P8BE-NEXT: oris r5, r5, 1603 +; P8BE-NEXT: mfvsrd r8, f1 +; P8BE-NEXT: mulhd r4, r6, r4 +; P8BE-NEXT: mulhd r3, r7, r3 +; P8BE-NEXT: ori r5, r5, 21445 +; P8BE-NEXT: mulhd r5, r8, r5 +; P8BE-NEXT: add r4, r4, r6 +; P8BE-NEXT: rldicl r9, r3, 1, 63 +; P8BE-NEXT: sradi r3, r3, 11 +; P8BE-NEXT: rldicl r10, r4, 1, 63 +; P8BE-NEXT: sradi r4, r4, 4 +; P8BE-NEXT: add r3, r3, r9 +; P8BE-NEXT: rldicl r9, r5, 1, 63 +; P8BE-NEXT: add r4, r4, r10 +; P8BE-NEXT: sradi r5, r5, 8 +; P8BE-NEXT: mulli r3, r3, 5423 +; P8BE-NEXT: add r5, r5, r9 +; P8BE-NEXT: mulli r4, r4, 23 +; P8BE-NEXT: mulli r5, r5, 654 +; P8BE-NEXT: sub r3, r7, r3 +; P8BE-NEXT: sub r4, r6, r4 +; P8BE-NEXT: mtvsrd f0, r3 +; P8BE-NEXT: sub r3, r8, r5 +; P8BE-NEXT: mtvsrd f1, r4 +; P8BE-NEXT: li r4, 0 +; P8BE-NEXT: mtvsrd f2, r3 +; P8BE-NEXT: mtvsrd f3, r4 +; P8BE-NEXT: xxmrghd v3, vs1, vs0 +; P8BE-NEXT: xxmrghd v2, vs3, vs2 +; P8BE-NEXT: blr + %1 = srem <4 x i64> %x, + ret <4 x i64> %1 +} diff --git a/llvm/test/CodeGen/PowerPC/urem-lkk.ll b/llvm/test/CodeGen/PowerPC/urem-lkk.ll new file mode 100644 index 0000000000000..f361200d54fda --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/urem-lkk.ll @@ -0,0 +1,106 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-linux-gnu -mcpu=ppc64 < %s | FileCheck -check-prefixes=CHECK,CHECK64 %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-linux-gnu -mcpu=ppc < %s | FileCheck -check-prefixes=CHECK,CHECK32 %s + +define i32 @fold_urem_positive_odd(i32 %x) { +; CHECK-LABEL: fold_urem_positive_odd: +; CHECK: # %bb.0: +; CHECK-NEXT: lis 4, 22765 +; CHECK-NEXT: ori 4, 4, 8969 +; CHECK-NEXT: mulhwu 4, 3, 4 +; CHECK-NEXT: subf 5, 4, 3 +; CHECK-NEXT: srwi 5, 5, 1 +; CHECK-NEXT: add 4, 5, 4 +; CHECK-NEXT: srwi 4, 4, 6 +; CHECK-NEXT: mulli 4, 4, 95 +; CHECK-NEXT: subf 3, 4, 3 +; CHECK-NEXT: blr + %1 = urem i32 %x, 95 + ret i32 %1 +} + + +define i32 @fold_urem_positive_even(i32 %x) { +; CHECK-LABEL: fold_urem_positive_even: +; CHECK: # %bb.0: +; CHECK-NEXT: lis 4, -2226 +; CHECK-NEXT: ori 4, 4, 16323 +; CHECK-NEXT: mulhwu 4, 3, 4 +; CHECK-NEXT: srwi 4, 4, 10 +; CHECK-NEXT: mulli 4, 4, 1060 +; CHECK-NEXT: subf 3, 4, 3 +; CHECK-NEXT: blr + %1 = urem i32 %x, 1060 + ret i32 %1 +} + + +; Don't fold if we can combine urem with udiv. +define i32 @combine_urem_udiv(i32 %x) { +; CHECK-LABEL: combine_urem_udiv: +; CHECK: # %bb.0: +; CHECK-NEXT: lis 4, 22765 +; CHECK-NEXT: ori 4, 4, 8969 +; CHECK-NEXT: mulhwu 4, 3, 4 +; CHECK-NEXT: subf 5, 4, 3 +; CHECK-NEXT: srwi 5, 5, 1 +; CHECK-NEXT: add 4, 5, 4 +; CHECK-NEXT: srwi 4, 4, 6 +; CHECK-NEXT: mulli 5, 4, 95 +; CHECK-NEXT: subf 3, 5, 3 +; CHECK-NEXT: add 3, 3, 4 +; CHECK-NEXT: blr + %1 = urem i32 %x, 95 + %2 = udiv i32 %x, 95 + %3 = add i32 %1, %2 + ret i32 %3 +} + +; Don't fold for divisors that are a power of two. +define i32 @dont_fold_urem_power_of_two(i32 %x) { +; CHECK-LABEL: dont_fold_urem_power_of_two: +; CHECK: # %bb.0: +; CHECK-NEXT: clrlwi 3, 3, 26 +; CHECK-NEXT: blr + %1 = urem i32 %x, 64 + ret i32 %1 +} + +; Don't fold if the divisor is one. +define i32 @dont_fold_urem_one(i32 %x) { +; CHECK-LABEL: dont_fold_urem_one: +; CHECK: # %bb.0: +; CHECK-NEXT: li 3, 0 +; CHECK-NEXT: blr + %1 = urem i32 %x, 1 + ret i32 %1 +} + +; Don't fold if the divisor is 2^32. +define i32 @dont_fold_urem_i32_umax(i32 %x) { +; CHECK-LABEL: dont_fold_urem_i32_umax: +; CHECK: # %bb.0: +; CHECK-NEXT: blr + %1 = urem i32 %x, 4294967296 + ret i32 %1 +} + +; Don't fold i64 urem +define i64 @dont_fold_urem_i64(i64 %x) { +; CHECK-LABEL: dont_fold_urem_i64: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr 0 +; CHECK-NEXT: stw 0, 4(1) +; CHECK-NEXT: stwu 1, -16(1) +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset lr, 4 +; CHECK-NEXT: li 5, 0 +; CHECK-NEXT: li 6, 98 +; CHECK-NEXT: bl __umoddi3@PLT +; CHECK-NEXT: lwz 0, 20(1) +; CHECK-NEXT: addi 1, 1, 16 +; CHECK-NEXT: mtlr 0 +; CHECK-NEXT: blr + %1 = urem i64 %x, 98 + ret i64 %1 +} diff --git a/llvm/test/CodeGen/PowerPC/urem-vector-lkk.ll b/llvm/test/CodeGen/PowerPC/urem-vector-lkk.ll new file mode 100644 index 0000000000000..e3d9027d9e98c --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/urem-vector-lkk.ll @@ -0,0 +1,1338 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ +; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,P9LE +; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ +; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,P9BE +; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ +; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,P8LE +; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ +; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,P8BE + +define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) { +; P9LE-LABEL: fold_urem_vec_1: +; P9LE: # %bb.0: +; P9LE-NEXT: li r3, 4 +; P9LE-NEXT: vextuhrx r3, r3, v2 +; P9LE-NEXT: lis r5, 21399 +; P9LE-NEXT: ori r5, r5, 33437 +; P9LE-NEXT: rlwinm r4, r3, 0, 16, 31 +; P9LE-NEXT: mulld r4, r4, r5 +; P9LE-NEXT: lis r5, 16727 +; P9LE-NEXT: ori r5, r5, 2287 +; P9LE-NEXT: rldicl r4, r4, 27, 37 +; P9LE-NEXT: mulli r4, r4, 98 +; P9LE-NEXT: subf r3, r4, r3 +; P9LE-NEXT: mtvsrd f0, r3 +; P9LE-NEXT: li r3, 6 +; P9LE-NEXT: vextuhrx r3, r3, v2 +; P9LE-NEXT: rlwinm r4, r3, 0, 16, 31 +; P9LE-NEXT: mulld r4, r4, r5 +; P9LE-NEXT: lis r5, 8456 +; P9LE-NEXT: ori r5, r5, 16913 +; P9LE-NEXT: rldicl r4, r4, 24, 40 +; P9LE-NEXT: mulli r4, r4, 1003 +; P9LE-NEXT: subf r3, r4, r3 +; P9LE-NEXT: xxswapd v3, vs0 +; P9LE-NEXT: mtvsrd f0, r3 +; P9LE-NEXT: li r3, 2 +; P9LE-NEXT: vextuhrx r3, r3, v2 +; P9LE-NEXT: rlwinm r4, r3, 30, 18, 31 +; P9LE-NEXT: mulld r4, r4, r5 +; P9LE-NEXT: rldicl r4, r4, 30, 34 +; P9LE-NEXT: mulli r4, r4, 124 +; P9LE-NEXT: subf r3, r4, r3 +; P9LE-NEXT: xxswapd v4, vs0 +; P9LE-NEXT: mtvsrd f0, r3 +; P9LE-NEXT: li r3, 0 +; P9LE-NEXT: vextuhrx r3, r3, v2 +; P9LE-NEXT: rlwinm r4, r3, 0, 16, 31 +; P9LE-NEXT: lis r6, 22765 +; P9LE-NEXT: ori r6, r6, 8969 +; P9LE-NEXT: vmrglh v3, v4, v3 +; P9LE-NEXT: xxswapd v4, vs0 +; P9LE-NEXT: clrldi r5, r4, 32 +; P9LE-NEXT: mulld r5, r5, r6 +; P9LE-NEXT: rldicl r5, r5, 32, 32 +; P9LE-NEXT: subf r4, r5, r4 +; P9LE-NEXT: srwi r4, r4, 1 +; P9LE-NEXT: add r4, r4, r5 +; P9LE-NEXT: srwi r4, r4, 6 +; P9LE-NEXT: mulli r4, r4, 95 +; P9LE-NEXT: subf r3, r4, r3 +; P9LE-NEXT: mtvsrd f0, r3 +; P9LE-NEXT: xxswapd v2, vs0 +; P9LE-NEXT: vmrglh v2, v4, v2 +; P9LE-NEXT: vmrglw v2, v3, v2 +; P9LE-NEXT: blr +; +; P9BE-LABEL: fold_urem_vec_1: +; P9BE: # %bb.0: +; P9BE-NEXT: li r3, 6 +; P9BE-NEXT: vextuhlx r3, r3, v2 +; P9BE-NEXT: rlwinm r3, r3, 0, 16, 31 +; P9BE-NEXT: lis r5, 16727 +; P9BE-NEXT: ori r5, r5, 2287 +; P9BE-NEXT: clrldi r4, r3, 32 +; P9BE-NEXT: mulld r4, r4, r5 +; P9BE-NEXT: lis r5, 21399 +; P9BE-NEXT: ori r5, r5, 33437 +; P9BE-NEXT: rldicl r4, r4, 24, 40 +; P9BE-NEXT: mulli r4, r4, 1003 +; P9BE-NEXT: subf r3, r4, r3 +; P9BE-NEXT: sldi r3, r3, 48 +; P9BE-NEXT: mtvsrd v3, r3 +; P9BE-NEXT: li r3, 4 +; P9BE-NEXT: vextuhlx r3, r3, v2 +; P9BE-NEXT: rlwinm r3, r3, 0, 16, 31 +; P9BE-NEXT: clrldi r4, r3, 32 +; P9BE-NEXT: mulld r4, r4, r5 +; P9BE-NEXT: lis r5, 8456 +; P9BE-NEXT: ori r5, r5, 16913 +; P9BE-NEXT: rldicl r4, r4, 27, 37 +; P9BE-NEXT: mulli r4, r4, 98 +; P9BE-NEXT: subf r3, r4, r3 +; P9BE-NEXT: sldi r3, r3, 48 +; P9BE-NEXT: mtvsrd v4, r3 +; P9BE-NEXT: li r3, 2 +; P9BE-NEXT: vextuhlx r3, r3, v2 +; P9BE-NEXT: clrlwi r4, r3, 16 +; P9BE-NEXT: rlwinm r3, r3, 30, 18, 31 +; P9BE-NEXT: mulld r3, r3, r5 +; P9BE-NEXT: lis r5, 22765 +; P9BE-NEXT: ori r5, r5, 8969 +; P9BE-NEXT: rldicl r3, r3, 30, 34 +; P9BE-NEXT: mulli r3, r3, 124 +; P9BE-NEXT: subf r3, r3, r4 +; P9BE-NEXT: sldi r3, r3, 48 +; P9BE-NEXT: vmrghh v3, v4, v3 +; P9BE-NEXT: mtvsrd v4, r3 +; P9BE-NEXT: li r3, 0 +; P9BE-NEXT: vextuhlx r3, r3, v2 +; P9BE-NEXT: rlwinm r3, r3, 0, 16, 31 +; P9BE-NEXT: clrldi r4, r3, 32 +; P9BE-NEXT: mulld r4, r4, r5 +; P9BE-NEXT: rldicl r4, r4, 32, 32 +; P9BE-NEXT: subf r5, r4, r3 +; P9BE-NEXT: srwi r5, r5, 1 +; P9BE-NEXT: add r4, r5, r4 +; P9BE-NEXT: srwi r4, r4, 6 +; P9BE-NEXT: mulli r4, r4, 95 +; P9BE-NEXT: subf r3, r4, r3 +; P9BE-NEXT: sldi r3, r3, 48 +; P9BE-NEXT: mtvsrd v2, r3 +; P9BE-NEXT: vmrghh v2, v2, v4 +; P9BE-NEXT: vmrghw v2, v2, v3 +; P9BE-NEXT: blr +; +; P8LE-LABEL: fold_urem_vec_1: +; P8LE: # %bb.0: +; P8LE-NEXT: xxswapd vs0, v2 +; P8LE-NEXT: lis r3, 22765 +; P8LE-NEXT: lis r8, 21399 +; P8LE-NEXT: ori r3, r3, 8969 +; P8LE-NEXT: ori r8, r8, 33437 +; P8LE-NEXT: mfvsrd r4, f0 +; P8LE-NEXT: clrldi r5, r4, 48 +; P8LE-NEXT: rldicl r9, r4, 32, 48 +; P8LE-NEXT: rlwinm r6, r5, 0, 16, 31 +; P8LE-NEXT: rldicl r10, r4, 16, 48 +; P8LE-NEXT: rlwinm r11, r9, 0, 16, 31 +; P8LE-NEXT: clrldi r7, r6, 32 +; P8LE-NEXT: rlwinm r12, r10, 0, 16, 31 +; P8LE-NEXT: mulld r3, r7, r3 +; P8LE-NEXT: lis r7, 16727 +; P8LE-NEXT: ori r7, r7, 2287 +; P8LE-NEXT: mulld r8, r11, r8 +; P8LE-NEXT: lis r11, 8456 +; P8LE-NEXT: rldicl r4, r4, 48, 48 +; P8LE-NEXT: mulld r7, r12, r7 +; P8LE-NEXT: ori r11, r11, 16913 +; P8LE-NEXT: rlwinm r12, r4, 30, 18, 31 +; P8LE-NEXT: rldicl r3, r3, 32, 32 +; P8LE-NEXT: mulld r11, r12, r11 +; P8LE-NEXT: subf r6, r3, r6 +; P8LE-NEXT: rldicl r8, r8, 27, 37 +; P8LE-NEXT: srwi r6, r6, 1 +; P8LE-NEXT: add r3, r6, r3 +; P8LE-NEXT: rldicl r6, r7, 24, 40 +; P8LE-NEXT: mulli r7, r8, 98 +; P8LE-NEXT: srwi r3, r3, 6 +; P8LE-NEXT: rldicl r8, r11, 30, 34 +; P8LE-NEXT: mulli r6, r6, 1003 +; P8LE-NEXT: mulli r3, r3, 95 +; P8LE-NEXT: mulli r8, r8, 124 +; P8LE-NEXT: subf r7, r7, r9 +; P8LE-NEXT: subf r6, r6, r10 +; P8LE-NEXT: mtvsrd f0, r7 +; P8LE-NEXT: subf r3, r3, r5 +; P8LE-NEXT: subf r4, r8, r4 +; P8LE-NEXT: mtvsrd f1, r6 +; P8LE-NEXT: mtvsrd f2, r3 +; P8LE-NEXT: xxswapd v2, vs0 +; P8LE-NEXT: mtvsrd f3, r4 +; P8LE-NEXT: xxswapd v3, vs1 +; P8LE-NEXT: xxswapd v4, vs2 +; P8LE-NEXT: xxswapd v5, vs3 +; P8LE-NEXT: vmrglh v2, v3, v2 +; P8LE-NEXT: vmrglh v3, v5, v4 +; P8LE-NEXT: vmrglw v2, v2, v3 +; P8LE-NEXT: blr +; +; P8BE-LABEL: fold_urem_vec_1: +; P8BE: # %bb.0: +; P8BE-NEXT: mfvsrd r4, v2 +; P8BE-NEXT: lis r3, 22765 +; P8BE-NEXT: lis r9, 16727 +; P8BE-NEXT: ori r3, r3, 8969 +; P8BE-NEXT: ori r9, r9, 2287 +; P8BE-NEXT: rldicl r5, r4, 16, 48 +; P8BE-NEXT: clrldi r6, r4, 48 +; P8BE-NEXT: rlwinm r5, r5, 0, 16, 31 +; P8BE-NEXT: rldicl r7, r4, 48, 48 +; P8BE-NEXT: rlwinm r6, r6, 0, 16, 31 +; P8BE-NEXT: clrldi r8, r5, 32 +; P8BE-NEXT: rlwinm r7, r7, 0, 16, 31 +; P8BE-NEXT: mulld r3, r8, r3 +; P8BE-NEXT: lis r8, 21399 +; P8BE-NEXT: clrldi r10, r6, 32 +; P8BE-NEXT: ori r8, r8, 33437 +; P8BE-NEXT: clrldi r11, r7, 32 +; P8BE-NEXT: mulld r9, r10, r9 +; P8BE-NEXT: lis r10, 8456 +; P8BE-NEXT: rldicl r4, r4, 32, 48 +; P8BE-NEXT: mulld r8, r11, r8 +; P8BE-NEXT: ori r10, r10, 16913 +; P8BE-NEXT: rlwinm r11, r4, 30, 18, 31 +; P8BE-NEXT: rldicl r3, r3, 32, 32 +; P8BE-NEXT: rlwinm r4, r4, 0, 16, 31 +; P8BE-NEXT: mulld r10, r11, r10 +; P8BE-NEXT: subf r11, r3, r5 +; P8BE-NEXT: srwi r11, r11, 1 +; P8BE-NEXT: rldicl r9, r9, 24, 40 +; P8BE-NEXT: add r3, r11, r3 +; P8BE-NEXT: rldicl r8, r8, 27, 37 +; P8BE-NEXT: srwi r3, r3, 6 +; P8BE-NEXT: mulli r9, r9, 1003 +; P8BE-NEXT: rldicl r10, r10, 30, 34 +; P8BE-NEXT: mulli r8, r8, 98 +; P8BE-NEXT: mulli r3, r3, 95 +; P8BE-NEXT: mulli r10, r10, 124 +; P8BE-NEXT: subf r6, r9, r6 +; P8BE-NEXT: subf r7, r8, r7 +; P8BE-NEXT: sldi r6, r6, 48 +; P8BE-NEXT: subf r3, r3, r5 +; P8BE-NEXT: subf r4, r10, r4 +; P8BE-NEXT: mtvsrd v2, r6 +; P8BE-NEXT: sldi r5, r7, 48 +; P8BE-NEXT: sldi r3, r3, 48 +; P8BE-NEXT: sldi r4, r4, 48 +; P8BE-NEXT: mtvsrd v3, r5 +; P8BE-NEXT: mtvsrd v4, r3 +; P8BE-NEXT: mtvsrd v5, r4 +; P8BE-NEXT: vmrghh v2, v3, v2 +; P8BE-NEXT: vmrghh v3, v4, v5 +; P8BE-NEXT: vmrghw v2, v3, v2 +; P8BE-NEXT: blr + %1 = urem <4 x i16> %x, + ret <4 x i16> %1 +} + +define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) { +; P9LE-LABEL: fold_urem_vec_2: +; P9LE: # %bb.0: +; P9LE-NEXT: li r3, 0 +; P9LE-NEXT: vextuhrx r3, r3, v2 +; P9LE-NEXT: rlwinm r4, r3, 0, 16, 31 +; P9LE-NEXT: lis r6, 22765 +; P9LE-NEXT: ori r6, r6, 8969 +; P9LE-NEXT: clrldi r5, r4, 32 +; P9LE-NEXT: mulld r5, r5, r6 +; P9LE-NEXT: rldicl r5, r5, 32, 32 +; P9LE-NEXT: subf r4, r5, r4 +; P9LE-NEXT: srwi r4, r4, 1 +; P9LE-NEXT: add r4, r4, r5 +; P9LE-NEXT: srwi r4, r4, 6 +; P9LE-NEXT: mulli r4, r4, 95 +; P9LE-NEXT: subf r3, r4, r3 +; P9LE-NEXT: mtvsrd f0, r3 +; P9LE-NEXT: li r3, 2 +; P9LE-NEXT: vextuhrx r3, r3, v2 +; P9LE-NEXT: rlwinm r4, r3, 0, 16, 31 +; P9LE-NEXT: clrldi r5, r4, 32 +; P9LE-NEXT: mulld r5, r5, r6 +; P9LE-NEXT: rldicl r5, r5, 32, 32 +; P9LE-NEXT: subf r4, r5, r4 +; P9LE-NEXT: srwi r4, r4, 1 +; P9LE-NEXT: add r4, r4, r5 +; P9LE-NEXT: srwi r4, r4, 6 +; P9LE-NEXT: mulli r4, r4, 95 +; P9LE-NEXT: subf r3, r4, r3 +; P9LE-NEXT: xxswapd v3, vs0 +; P9LE-NEXT: mtvsrd f0, r3 +; P9LE-NEXT: li r3, 4 +; P9LE-NEXT: vextuhrx r3, r3, v2 +; P9LE-NEXT: rlwinm r4, r3, 0, 16, 31 +; P9LE-NEXT: clrldi r5, r4, 32 +; P9LE-NEXT: mulld r5, r5, r6 +; P9LE-NEXT: rldicl r5, r5, 32, 32 +; P9LE-NEXT: subf r4, r5, r4 +; P9LE-NEXT: srwi r4, r4, 1 +; P9LE-NEXT: add r4, r4, r5 +; P9LE-NEXT: srwi r4, r4, 6 +; P9LE-NEXT: mulli r4, r4, 95 +; P9LE-NEXT: subf r3, r4, r3 +; P9LE-NEXT: xxswapd v4, vs0 +; P9LE-NEXT: mtvsrd f0, r3 +; P9LE-NEXT: li r3, 6 +; P9LE-NEXT: vextuhrx r3, r3, v2 +; P9LE-NEXT: rlwinm r4, r3, 0, 16, 31 +; P9LE-NEXT: clrldi r5, r4, 32 +; P9LE-NEXT: mulld r5, r5, r6 +; P9LE-NEXT: rldicl r5, r5, 32, 32 +; P9LE-NEXT: subf r4, r5, r4 +; P9LE-NEXT: srwi r4, r4, 1 +; P9LE-NEXT: add r4, r4, r5 +; P9LE-NEXT: srwi r4, r4, 6 +; P9LE-NEXT: mulli r4, r4, 95 +; P9LE-NEXT: subf r3, r4, r3 +; P9LE-NEXT: vmrglh v3, v4, v3 +; P9LE-NEXT: xxswapd v4, vs0 +; P9LE-NEXT: mtvsrd f0, r3 +; P9LE-NEXT: xxswapd v2, vs0 +; P9LE-NEXT: vmrglh v2, v2, v4 +; P9LE-NEXT: vmrglw v2, v2, v3 +; P9LE-NEXT: blr +; +; P9BE-LABEL: fold_urem_vec_2: +; P9BE: # %bb.0: +; P9BE-NEXT: li r3, 6 +; P9BE-NEXT: vextuhlx r3, r3, v2 +; P9BE-NEXT: rlwinm r3, r3, 0, 16, 31 +; P9BE-NEXT: lis r5, 22765 +; P9BE-NEXT: ori r5, r5, 8969 +; P9BE-NEXT: clrldi r4, r3, 32 +; P9BE-NEXT: mulld r4, r4, r5 +; P9BE-NEXT: rldicl r4, r4, 32, 32 +; P9BE-NEXT: subf r6, r4, r3 +; P9BE-NEXT: srwi r6, r6, 1 +; P9BE-NEXT: add r4, r6, r4 +; P9BE-NEXT: srwi r4, r4, 6 +; P9BE-NEXT: mulli r4, r4, 95 +; P9BE-NEXT: subf r3, r4, r3 +; P9BE-NEXT: sldi r3, r3, 48 +; P9BE-NEXT: mtvsrd v3, r3 +; P9BE-NEXT: li r3, 4 +; P9BE-NEXT: vextuhlx r3, r3, v2 +; P9BE-NEXT: rlwinm r3, r3, 0, 16, 31 +; P9BE-NEXT: clrldi r4, r3, 32 +; P9BE-NEXT: mulld r4, r4, r5 +; P9BE-NEXT: rldicl r4, r4, 32, 32 +; P9BE-NEXT: subf r6, r4, r3 +; P9BE-NEXT: srwi r6, r6, 1 +; P9BE-NEXT: add r4, r6, r4 +; P9BE-NEXT: srwi r4, r4, 6 +; P9BE-NEXT: mulli r4, r4, 95 +; P9BE-NEXT: subf r3, r4, r3 +; P9BE-NEXT: sldi r3, r3, 48 +; P9BE-NEXT: mtvsrd v4, r3 +; P9BE-NEXT: li r3, 2 +; P9BE-NEXT: vextuhlx r3, r3, v2 +; P9BE-NEXT: rlwinm r3, r3, 0, 16, 31 +; P9BE-NEXT: clrldi r4, r3, 32 +; P9BE-NEXT: mulld r4, r4, r5 +; P9BE-NEXT: rldicl r4, r4, 32, 32 +; P9BE-NEXT: subf r6, r4, r3 +; P9BE-NEXT: srwi r6, r6, 1 +; P9BE-NEXT: add r4, r6, r4 +; P9BE-NEXT: srwi r4, r4, 6 +; P9BE-NEXT: mulli r4, r4, 95 +; P9BE-NEXT: subf r3, r4, r3 +; P9BE-NEXT: sldi r3, r3, 48 +; P9BE-NEXT: vmrghh v3, v4, v3 +; P9BE-NEXT: mtvsrd v4, r3 +; P9BE-NEXT: li r3, 0 +; P9BE-NEXT: vextuhlx r3, r3, v2 +; P9BE-NEXT: rlwinm r3, r3, 0, 16, 31 +; P9BE-NEXT: clrldi r4, r3, 32 +; P9BE-NEXT: mulld r4, r4, r5 +; P9BE-NEXT: rldicl r4, r4, 32, 32 +; P9BE-NEXT: subf r5, r4, r3 +; P9BE-NEXT: srwi r5, r5, 1 +; P9BE-NEXT: add r4, r5, r4 +; P9BE-NEXT: srwi r4, r4, 6 +; P9BE-NEXT: mulli r4, r4, 95 +; P9BE-NEXT: subf r3, r4, r3 +; P9BE-NEXT: sldi r3, r3, 48 +; P9BE-NEXT: mtvsrd v2, r3 +; P9BE-NEXT: vmrghh v2, v2, v4 +; P9BE-NEXT: vmrghw v2, v2, v3 +; P9BE-NEXT: blr +; +; P8LE-LABEL: fold_urem_vec_2: +; P8LE: # %bb.0: +; P8LE-NEXT: xxswapd vs0, v2 +; P8LE-NEXT: lis r4, 22765 +; P8LE-NEXT: std r29, -24(r1) # 8-byte Folded Spill +; P8LE-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; P8LE-NEXT: ori r4, r4, 8969 +; P8LE-NEXT: mfvsrd r5, f0 +; P8LE-NEXT: clrldi r3, r5, 48 +; P8LE-NEXT: rldicl r6, r5, 48, 48 +; P8LE-NEXT: rlwinm r8, r3, 0, 16, 31 +; P8LE-NEXT: rldicl r7, r5, 32, 48 +; P8LE-NEXT: rlwinm r9, r6, 0, 16, 31 +; P8LE-NEXT: rldicl r5, r5, 16, 48 +; P8LE-NEXT: clrldi r11, r8, 32 +; P8LE-NEXT: rlwinm r10, r7, 0, 16, 31 +; P8LE-NEXT: rlwinm r12, r5, 0, 16, 31 +; P8LE-NEXT: mulld r11, r11, r4 +; P8LE-NEXT: clrldi r0, r9, 32 +; P8LE-NEXT: clrldi r30, r10, 32 +; P8LE-NEXT: clrldi r29, r12, 32 +; P8LE-NEXT: mulld r0, r0, r4 +; P8LE-NEXT: mulld r30, r30, r4 +; P8LE-NEXT: mulld r4, r29, r4 +; P8LE-NEXT: ld r29, -24(r1) # 8-byte Folded Reload +; P8LE-NEXT: rldicl r11, r11, 32, 32 +; P8LE-NEXT: subf r8, r11, r8 +; P8LE-NEXT: rldicl r0, r0, 32, 32 +; P8LE-NEXT: srwi r8, r8, 1 +; P8LE-NEXT: rldicl r30, r30, 32, 32 +; P8LE-NEXT: rldicl r4, r4, 32, 32 +; P8LE-NEXT: subf r9, r0, r9 +; P8LE-NEXT: add r8, r8, r11 +; P8LE-NEXT: subf r10, r30, r10 +; P8LE-NEXT: subf r11, r4, r12 +; P8LE-NEXT: srwi r9, r9, 1 +; P8LE-NEXT: srwi r8, r8, 6 +; P8LE-NEXT: srwi r10, r10, 1 +; P8LE-NEXT: srwi r11, r11, 1 +; P8LE-NEXT: add r9, r9, r0 +; P8LE-NEXT: add r10, r10, r30 +; P8LE-NEXT: add r4, r11, r4 +; P8LE-NEXT: srwi r9, r9, 6 +; P8LE-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; P8LE-NEXT: mulli r8, r8, 95 +; P8LE-NEXT: srwi r10, r10, 6 +; P8LE-NEXT: srwi r4, r4, 6 +; P8LE-NEXT: mulli r9, r9, 95 +; P8LE-NEXT: mulli r10, r10, 95 +; P8LE-NEXT: mulli r4, r4, 95 +; P8LE-NEXT: subf r3, r8, r3 +; P8LE-NEXT: subf r6, r9, r6 +; P8LE-NEXT: mtvsrd f0, r3 +; P8LE-NEXT: subf r3, r10, r7 +; P8LE-NEXT: subf r4, r4, r5 +; P8LE-NEXT: mtvsrd f1, r6 +; P8LE-NEXT: mtvsrd f2, r3 +; P8LE-NEXT: xxswapd v2, vs0 +; P8LE-NEXT: mtvsrd f3, r4 +; P8LE-NEXT: xxswapd v3, vs1 +; P8LE-NEXT: xxswapd v4, vs2 +; P8LE-NEXT: xxswapd v5, vs3 +; P8LE-NEXT: vmrglh v2, v3, v2 +; P8LE-NEXT: vmrglh v3, v5, v4 +; P8LE-NEXT: vmrglw v2, v3, v2 +; P8LE-NEXT: blr +; +; P8BE-LABEL: fold_urem_vec_2: +; P8BE: # %bb.0: +; P8BE-NEXT: mfvsrd r4, v2 +; P8BE-NEXT: lis r3, 22765 +; P8BE-NEXT: ori r3, r3, 8969 +; P8BE-NEXT: clrldi r5, r4, 48 +; P8BE-NEXT: rldicl r6, r4, 48, 48 +; P8BE-NEXT: rlwinm r5, r5, 0, 16, 31 +; P8BE-NEXT: rldicl r7, r4, 32, 48 +; P8BE-NEXT: rlwinm r6, r6, 0, 16, 31 +; P8BE-NEXT: clrldi r8, r5, 32 +; P8BE-NEXT: rldicl r4, r4, 16, 48 +; P8BE-NEXT: rlwinm r7, r7, 0, 16, 31 +; P8BE-NEXT: clrldi r9, r6, 32 +; P8BE-NEXT: mulld r8, r8, r3 +; P8BE-NEXT: rlwinm r4, r4, 0, 16, 31 +; P8BE-NEXT: clrldi r10, r7, 32 +; P8BE-NEXT: mulld r9, r9, r3 +; P8BE-NEXT: clrldi r11, r4, 32 +; P8BE-NEXT: mulld r10, r10, r3 +; P8BE-NEXT: mulld r3, r11, r3 +; P8BE-NEXT: rldicl r8, r8, 32, 32 +; P8BE-NEXT: rldicl r9, r9, 32, 32 +; P8BE-NEXT: subf r11, r8, r5 +; P8BE-NEXT: rldicl r10, r10, 32, 32 +; P8BE-NEXT: subf r12, r9, r6 +; P8BE-NEXT: srwi r11, r11, 1 +; P8BE-NEXT: rldicl r3, r3, 32, 32 +; P8BE-NEXT: add r8, r11, r8 +; P8BE-NEXT: subf r11, r10, r7 +; P8BE-NEXT: srwi r12, r12, 1 +; P8BE-NEXT: add r9, r12, r9 +; P8BE-NEXT: subf r12, r3, r4 +; P8BE-NEXT: srwi r11, r11, 1 +; P8BE-NEXT: srwi r8, r8, 6 +; P8BE-NEXT: add r10, r11, r10 +; P8BE-NEXT: srwi r11, r12, 1 +; P8BE-NEXT: srwi r9, r9, 6 +; P8BE-NEXT: add r3, r11, r3 +; P8BE-NEXT: srwi r10, r10, 6 +; P8BE-NEXT: srwi r3, r3, 6 +; P8BE-NEXT: mulli r8, r8, 95 +; P8BE-NEXT: mulli r9, r9, 95 +; P8BE-NEXT: mulli r10, r10, 95 +; P8BE-NEXT: mulli r3, r3, 95 +; P8BE-NEXT: subf r5, r8, r5 +; P8BE-NEXT: subf r6, r9, r6 +; P8BE-NEXT: subf r7, r10, r7 +; P8BE-NEXT: subf r3, r3, r4 +; P8BE-NEXT: sldi r5, r5, 48 +; P8BE-NEXT: sldi r6, r6, 48 +; P8BE-NEXT: sldi r4, r7, 48 +; P8BE-NEXT: mtvsrd v2, r5 +; P8BE-NEXT: sldi r3, r3, 48 +; P8BE-NEXT: mtvsrd v3, r6 +; P8BE-NEXT: mtvsrd v4, r4 +; P8BE-NEXT: mtvsrd v5, r3 +; P8BE-NEXT: vmrghh v2, v3, v2 +; P8BE-NEXT: vmrghh v3, v5, v4 +; P8BE-NEXT: vmrghw v2, v3, v2 +; P8BE-NEXT: blr + %1 = urem <4 x i16> %x, + ret <4 x i16> %1 +} + + +; Don't fold if we can combine urem with udiv. +define <4 x i16> @combine_urem_udiv(<4 x i16> %x) { +; P9LE-LABEL: combine_urem_udiv: +; P9LE: # %bb.0: +; P9LE-NEXT: li r3, 0 +; P9LE-NEXT: vextuhrx r3, r3, v2 +; P9LE-NEXT: rlwinm r4, r3, 0, 16, 31 +; P9LE-NEXT: lis r6, 22765 +; P9LE-NEXT: ori r6, r6, 8969 +; P9LE-NEXT: clrldi r5, r4, 32 +; P9LE-NEXT: mulld r5, r5, r6 +; P9LE-NEXT: rldicl r5, r5, 32, 32 +; P9LE-NEXT: subf r4, r5, r4 +; P9LE-NEXT: srwi r4, r4, 1 +; P9LE-NEXT: add r4, r4, r5 +; P9LE-NEXT: srwi r4, r4, 6 +; P9LE-NEXT: mulli r5, r4, 95 +; P9LE-NEXT: subf r3, r5, r3 +; P9LE-NEXT: mtvsrd f0, r3 +; P9LE-NEXT: li r3, 2 +; P9LE-NEXT: vextuhrx r3, r3, v2 +; P9LE-NEXT: rlwinm r5, r3, 0, 16, 31 +; P9LE-NEXT: clrldi r7, r5, 32 +; P9LE-NEXT: mulld r7, r7, r6 +; P9LE-NEXT: rldicl r7, r7, 32, 32 +; P9LE-NEXT: subf r5, r7, r5 +; P9LE-NEXT: srwi r5, r5, 1 +; P9LE-NEXT: add r5, r5, r7 +; P9LE-NEXT: srwi r5, r5, 6 +; P9LE-NEXT: mulli r7, r5, 95 +; P9LE-NEXT: subf r3, r7, r3 +; P9LE-NEXT: xxswapd v3, vs0 +; P9LE-NEXT: mtvsrd f0, r3 +; P9LE-NEXT: li r3, 4 +; P9LE-NEXT: vextuhrx r3, r3, v2 +; P9LE-NEXT: rlwinm r7, r3, 0, 16, 31 +; P9LE-NEXT: clrldi r8, r7, 32 +; P9LE-NEXT: mulld r8, r8, r6 +; P9LE-NEXT: rldicl r8, r8, 32, 32 +; P9LE-NEXT: subf r7, r8, r7 +; P9LE-NEXT: srwi r7, r7, 1 +; P9LE-NEXT: add r7, r7, r8 +; P9LE-NEXT: srwi r7, r7, 6 +; P9LE-NEXT: mulli r8, r7, 95 +; P9LE-NEXT: subf r3, r8, r3 +; P9LE-NEXT: xxswapd v4, vs0 +; P9LE-NEXT: mtvsrd f0, r3 +; P9LE-NEXT: li r3, 6 +; P9LE-NEXT: vextuhrx r3, r3, v2 +; P9LE-NEXT: rlwinm r8, r3, 0, 16, 31 +; P9LE-NEXT: clrldi r9, r8, 32 +; P9LE-NEXT: mulld r6, r9, r6 +; P9LE-NEXT: rldicl r6, r6, 32, 32 +; P9LE-NEXT: subf r8, r6, r8 +; P9LE-NEXT: srwi r8, r8, 1 +; P9LE-NEXT: add r6, r8, r6 +; P9LE-NEXT: srwi r6, r6, 6 +; P9LE-NEXT: mulli r8, r6, 95 +; P9LE-NEXT: subf r3, r8, r3 +; P9LE-NEXT: vmrglh v3, v4, v3 +; P9LE-NEXT: xxswapd v4, vs0 +; P9LE-NEXT: mtvsrd f0, r3 +; P9LE-NEXT: xxswapd v2, vs0 +; P9LE-NEXT: mtvsrd f0, r4 +; P9LE-NEXT: vmrglh v2, v2, v4 +; P9LE-NEXT: vmrglw v2, v2, v3 +; P9LE-NEXT: xxswapd v3, vs0 +; P9LE-NEXT: mtvsrd f0, r5 +; P9LE-NEXT: xxswapd v4, vs0 +; P9LE-NEXT: mtvsrd f0, r7 +; P9LE-NEXT: vmrglh v3, v4, v3 +; P9LE-NEXT: xxswapd v4, vs0 +; P9LE-NEXT: mtvsrd f0, r6 +; P9LE-NEXT: xxswapd v5, vs0 +; P9LE-NEXT: vmrglh v4, v5, v4 +; P9LE-NEXT: vmrglw v3, v4, v3 +; P9LE-NEXT: vadduhm v2, v2, v3 +; P9LE-NEXT: blr +; +; P9BE-LABEL: combine_urem_udiv: +; P9BE: # %bb.0: +; P9BE-NEXT: li r3, 6 +; P9BE-NEXT: vextuhlx r3, r3, v2 +; P9BE-NEXT: rlwinm r4, r3, 0, 16, 31 +; P9BE-NEXT: lis r6, 22765 +; P9BE-NEXT: ori r6, r6, 8969 +; P9BE-NEXT: clrldi r5, r4, 32 +; P9BE-NEXT: mulld r5, r5, r6 +; P9BE-NEXT: rldicl r5, r5, 32, 32 +; P9BE-NEXT: subf r4, r5, r4 +; P9BE-NEXT: srwi r4, r4, 1 +; P9BE-NEXT: add r4, r4, r5 +; P9BE-NEXT: srwi r4, r4, 6 +; P9BE-NEXT: mulli r5, r4, 95 +; P9BE-NEXT: subf r3, r5, r3 +; P9BE-NEXT: sldi r3, r3, 48 +; P9BE-NEXT: mtvsrd v3, r3 +; P9BE-NEXT: li r3, 4 +; P9BE-NEXT: vextuhlx r3, r3, v2 +; P9BE-NEXT: rlwinm r5, r3, 0, 16, 31 +; P9BE-NEXT: clrldi r7, r5, 32 +; P9BE-NEXT: mulld r7, r7, r6 +; P9BE-NEXT: rldicl r7, r7, 32, 32 +; P9BE-NEXT: subf r5, r7, r5 +; P9BE-NEXT: srwi r5, r5, 1 +; P9BE-NEXT: add r5, r5, r7 +; P9BE-NEXT: srwi r5, r5, 6 +; P9BE-NEXT: mulli r7, r5, 95 +; P9BE-NEXT: subf r3, r7, r3 +; P9BE-NEXT: sldi r3, r3, 48 +; P9BE-NEXT: mtvsrd v4, r3 +; P9BE-NEXT: li r3, 2 +; P9BE-NEXT: vextuhlx r3, r3, v2 +; P9BE-NEXT: rlwinm r7, r3, 0, 16, 31 +; P9BE-NEXT: clrldi r8, r7, 32 +; P9BE-NEXT: mulld r8, r8, r6 +; P9BE-NEXT: rldicl r8, r8, 32, 32 +; P9BE-NEXT: subf r7, r8, r7 +; P9BE-NEXT: srwi r7, r7, 1 +; P9BE-NEXT: add r7, r7, r8 +; P9BE-NEXT: srwi r7, r7, 6 +; P9BE-NEXT: mulli r8, r7, 95 +; P9BE-NEXT: subf r3, r8, r3 +; P9BE-NEXT: sldi r3, r3, 48 +; P9BE-NEXT: vmrghh v3, v4, v3 +; P9BE-NEXT: mtvsrd v4, r3 +; P9BE-NEXT: li r3, 0 +; P9BE-NEXT: vextuhlx r3, r3, v2 +; P9BE-NEXT: rlwinm r3, r3, 0, 16, 31 +; P9BE-NEXT: clrldi r8, r3, 32 +; P9BE-NEXT: mulld r6, r8, r6 +; P9BE-NEXT: rldicl r6, r6, 32, 32 +; P9BE-NEXT: subf r8, r6, r3 +; P9BE-NEXT: srwi r8, r8, 1 +; P9BE-NEXT: add r6, r8, r6 +; P9BE-NEXT: srwi r6, r6, 6 +; P9BE-NEXT: mulli r8, r6, 95 +; P9BE-NEXT: subf r3, r8, r3 +; P9BE-NEXT: sldi r3, r3, 48 +; P9BE-NEXT: mtvsrd v2, r3 +; P9BE-NEXT: sldi r3, r4, 48 +; P9BE-NEXT: vmrghh v2, v2, v4 +; P9BE-NEXT: vmrghw v2, v2, v3 +; P9BE-NEXT: mtvsrd v3, r3 +; P9BE-NEXT: sldi r3, r5, 48 +; P9BE-NEXT: mtvsrd v4, r3 +; P9BE-NEXT: sldi r3, r7, 48 +; P9BE-NEXT: vmrghh v3, v4, v3 +; P9BE-NEXT: mtvsrd v4, r3 +; P9BE-NEXT: sldi r3, r6, 48 +; P9BE-NEXT: mtvsrd v5, r3 +; P9BE-NEXT: vmrghh v4, v5, v4 +; P9BE-NEXT: vmrghw v3, v4, v3 +; P9BE-NEXT: vadduhm v2, v2, v3 +; P9BE-NEXT: blr +; +; P8LE-LABEL: combine_urem_udiv: +; P8LE: # %bb.0: +; P8LE-NEXT: xxswapd vs0, v2 +; P8LE-NEXT: lis r5, 22765 +; P8LE-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; P8LE-NEXT: std r29, -24(r1) # 8-byte Folded Spill +; P8LE-NEXT: ori r5, r5, 8969 +; P8LE-NEXT: mfvsrd r6, f0 +; P8LE-NEXT: clrldi r3, r6, 48 +; P8LE-NEXT: rldicl r4, r6, 48, 48 +; P8LE-NEXT: rldicl r7, r6, 32, 48 +; P8LE-NEXT: rlwinm r8, r3, 0, 16, 31 +; P8LE-NEXT: rlwinm r9, r4, 0, 16, 31 +; P8LE-NEXT: rldicl r6, r6, 16, 48 +; P8LE-NEXT: rlwinm r10, r7, 0, 16, 31 +; P8LE-NEXT: clrldi r11, r8, 32 +; P8LE-NEXT: rlwinm r12, r6, 0, 16, 31 +; P8LE-NEXT: clrldi r0, r9, 32 +; P8LE-NEXT: clrldi r30, r10, 32 +; P8LE-NEXT: mulld r11, r11, r5 +; P8LE-NEXT: clrldi r29, r12, 32 +; P8LE-NEXT: mulld r0, r0, r5 +; P8LE-NEXT: mulld r30, r30, r5 +; P8LE-NEXT: mulld r5, r29, r5 +; P8LE-NEXT: ld r29, -24(r1) # 8-byte Folded Reload +; P8LE-NEXT: rldicl r11, r11, 32, 32 +; P8LE-NEXT: rldicl r0, r0, 32, 32 +; P8LE-NEXT: rldicl r30, r30, 32, 32 +; P8LE-NEXT: subf r8, r11, r8 +; P8LE-NEXT: rldicl r5, r5, 32, 32 +; P8LE-NEXT: subf r9, r0, r9 +; P8LE-NEXT: srwi r8, r8, 1 +; P8LE-NEXT: subf r10, r30, r10 +; P8LE-NEXT: add r8, r8, r11 +; P8LE-NEXT: srwi r9, r9, 1 +; P8LE-NEXT: srwi r10, r10, 1 +; P8LE-NEXT: subf r11, r5, r12 +; P8LE-NEXT: add r9, r9, r0 +; P8LE-NEXT: srwi r8, r8, 6 +; P8LE-NEXT: add r10, r10, r30 +; P8LE-NEXT: srwi r11, r11, 1 +; P8LE-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; P8LE-NEXT: srwi r9, r9, 6 +; P8LE-NEXT: mulli r12, r8, 95 +; P8LE-NEXT: srwi r10, r10, 6 +; P8LE-NEXT: add r5, r11, r5 +; P8LE-NEXT: mtvsrd f0, r8 +; P8LE-NEXT: mulli r8, r9, 95 +; P8LE-NEXT: mtvsrd f1, r9 +; P8LE-NEXT: mulli r9, r10, 95 +; P8LE-NEXT: srwi r5, r5, 6 +; P8LE-NEXT: mtvsrd f3, r5 +; P8LE-NEXT: mulli r5, r5, 95 +; P8LE-NEXT: xxswapd v2, vs0 +; P8LE-NEXT: xxswapd v3, vs1 +; P8LE-NEXT: mtvsrd f2, r10 +; P8LE-NEXT: subf r3, r12, r3 +; P8LE-NEXT: xxswapd v6, vs3 +; P8LE-NEXT: mtvsrd f0, r3 +; P8LE-NEXT: subf r3, r9, r7 +; P8LE-NEXT: subf r4, r8, r4 +; P8LE-NEXT: xxswapd v1, vs2 +; P8LE-NEXT: mtvsrd f4, r3 +; P8LE-NEXT: subf r3, r5, r6 +; P8LE-NEXT: mtvsrd f1, r4 +; P8LE-NEXT: mtvsrd f5, r3 +; P8LE-NEXT: xxswapd v5, vs4 +; P8LE-NEXT: vmrglh v2, v3, v2 +; P8LE-NEXT: xxswapd v3, vs0 +; P8LE-NEXT: xxswapd v4, vs1 +; P8LE-NEXT: xxswapd v0, vs5 +; P8LE-NEXT: vmrglh v3, v4, v3 +; P8LE-NEXT: vmrglh v4, v0, v5 +; P8LE-NEXT: vmrglh v5, v6, v1 +; P8LE-NEXT: vmrglw v3, v4, v3 +; P8LE-NEXT: vmrglw v2, v5, v2 +; P8LE-NEXT: vadduhm v2, v3, v2 +; P8LE-NEXT: blr +; +; P8BE-LABEL: combine_urem_udiv: +; P8BE: # %bb.0: +; P8BE-NEXT: mfvsrd r6, v2 +; P8BE-NEXT: lis r5, 22765 +; P8BE-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; P8BE-NEXT: ori r5, r5, 8969 +; P8BE-NEXT: clrldi r3, r6, 48 +; P8BE-NEXT: rldicl r4, r6, 48, 48 +; P8BE-NEXT: rlwinm r8, r3, 0, 16, 31 +; P8BE-NEXT: rldicl r7, r6, 32, 48 +; P8BE-NEXT: rlwinm r9, r4, 0, 16, 31 +; P8BE-NEXT: rldicl r6, r6, 16, 48 +; P8BE-NEXT: clrldi r11, r8, 32 +; P8BE-NEXT: rlwinm r10, r7, 0, 16, 31 +; P8BE-NEXT: rlwinm r6, r6, 0, 16, 31 +; P8BE-NEXT: clrldi r12, r9, 32 +; P8BE-NEXT: mulld r11, r11, r5 +; P8BE-NEXT: clrldi r0, r10, 32 +; P8BE-NEXT: clrldi r30, r6, 32 +; P8BE-NEXT: mulld r12, r12, r5 +; P8BE-NEXT: mulld r0, r0, r5 +; P8BE-NEXT: mulld r5, r30, r5 +; P8BE-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; P8BE-NEXT: rldicl r11, r11, 32, 32 +; P8BE-NEXT: rldicl r12, r12, 32, 32 +; P8BE-NEXT: subf r8, r11, r8 +; P8BE-NEXT: rldicl r5, r5, 32, 32 +; P8BE-NEXT: subf r9, r12, r9 +; P8BE-NEXT: srwi r8, r8, 1 +; P8BE-NEXT: rldicl r0, r0, 32, 32 +; P8BE-NEXT: add r8, r8, r11 +; P8BE-NEXT: srwi r9, r9, 1 +; P8BE-NEXT: subf r11, r5, r6 +; P8BE-NEXT: subf r10, r0, r10 +; P8BE-NEXT: add r9, r9, r12 +; P8BE-NEXT: srwi r8, r8, 6 +; P8BE-NEXT: srwi r11, r11, 1 +; P8BE-NEXT: srwi r10, r10, 1 +; P8BE-NEXT: srwi r9, r9, 6 +; P8BE-NEXT: add r5, r11, r5 +; P8BE-NEXT: mulli r12, r8, 95 +; P8BE-NEXT: add r10, r10, r0 +; P8BE-NEXT: srwi r5, r5, 6 +; P8BE-NEXT: mulli r11, r9, 95 +; P8BE-NEXT: sldi r9, r9, 48 +; P8BE-NEXT: srwi r10, r10, 6 +; P8BE-NEXT: sldi r8, r8, 48 +; P8BE-NEXT: mtvsrd v3, r9 +; P8BE-NEXT: mulli r9, r5, 95 +; P8BE-NEXT: mtvsrd v2, r8 +; P8BE-NEXT: mulli r8, r10, 95 +; P8BE-NEXT: sldi r10, r10, 48 +; P8BE-NEXT: subf r3, r12, r3 +; P8BE-NEXT: mtvsrd v4, r10 +; P8BE-NEXT: subf r4, r11, r4 +; P8BE-NEXT: sldi r3, r3, 48 +; P8BE-NEXT: vmrghh v2, v3, v2 +; P8BE-NEXT: sldi r4, r4, 48 +; P8BE-NEXT: mtvsrd v3, r3 +; P8BE-NEXT: subf r3, r9, r6 +; P8BE-NEXT: subf r7, r8, r7 +; P8BE-NEXT: mtvsrd v5, r4 +; P8BE-NEXT: sldi r3, r3, 48 +; P8BE-NEXT: sldi r6, r7, 48 +; P8BE-NEXT: mtvsrd v1, r3 +; P8BE-NEXT: sldi r3, r5, 48 +; P8BE-NEXT: mtvsrd v0, r6 +; P8BE-NEXT: vmrghh v3, v5, v3 +; P8BE-NEXT: mtvsrd v5, r3 +; P8BE-NEXT: vmrghh v0, v1, v0 +; P8BE-NEXT: vmrghh v4, v5, v4 +; P8BE-NEXT: vmrghw v3, v0, v3 +; P8BE-NEXT: vmrghw v2, v4, v2 +; P8BE-NEXT: vadduhm v2, v3, v2 +; P8BE-NEXT: blr + %1 = urem <4 x i16> %x, + %2 = udiv <4 x i16> %x, + %3 = add <4 x i16> %1, %2 + ret <4 x i16> %3 +} + +; Don't fold for divisors that are a power of two. +define <4 x i16> @dont_fold_urem_power_of_two(<4 x i16> %x) { +; P9LE-LABEL: dont_fold_urem_power_of_two: +; P9LE: # %bb.0: +; P9LE-NEXT: li r3, 0 +; P9LE-NEXT: vextuhrx r3, r3, v2 +; P9LE-NEXT: rlwinm r3, r3, 0, 26, 31 +; P9LE-NEXT: mtvsrd f0, r3 +; P9LE-NEXT: li r3, 2 +; P9LE-NEXT: vextuhrx r3, r3, v2 +; P9LE-NEXT: rlwinm r3, r3, 0, 27, 31 +; P9LE-NEXT: xxswapd v3, vs0 +; P9LE-NEXT: mtvsrd f0, r3 +; P9LE-NEXT: li r3, 6 +; P9LE-NEXT: vextuhrx r3, r3, v2 +; P9LE-NEXT: rlwinm r4, r3, 0, 16, 31 +; P9LE-NEXT: lis r6, 22765 +; P9LE-NEXT: ori r6, r6, 8969 +; P9LE-NEXT: xxswapd v4, vs0 +; P9LE-NEXT: vmrglh v3, v4, v3 +; P9LE-NEXT: clrldi r5, r4, 32 +; P9LE-NEXT: mulld r5, r5, r6 +; P9LE-NEXT: rldicl r5, r5, 32, 32 +; P9LE-NEXT: subf r4, r5, r4 +; P9LE-NEXT: srwi r4, r4, 1 +; P9LE-NEXT: add r4, r4, r5 +; P9LE-NEXT: srwi r4, r4, 6 +; P9LE-NEXT: mulli r4, r4, 95 +; P9LE-NEXT: subf r3, r4, r3 +; P9LE-NEXT: mtvsrd f0, r3 +; P9LE-NEXT: li r3, 4 +; P9LE-NEXT: vextuhrx r3, r3, v2 +; P9LE-NEXT: rlwinm r3, r3, 0, 29, 31 +; P9LE-NEXT: xxswapd v4, vs0 +; P9LE-NEXT: mtvsrd f0, r3 +; P9LE-NEXT: xxswapd v2, vs0 +; P9LE-NEXT: vmrglh v2, v4, v2 +; P9LE-NEXT: vmrglw v2, v2, v3 +; P9LE-NEXT: blr +; +; P9BE-LABEL: dont_fold_urem_power_of_two: +; P9BE: # %bb.0: +; P9BE-NEXT: li r3, 2 +; P9BE-NEXT: vextuhlx r3, r3, v2 +; P9BE-NEXT: rlwinm r3, r3, 0, 27, 31 +; P9BE-NEXT: sldi r3, r3, 48 +; P9BE-NEXT: mtvsrd v3, r3 +; P9BE-NEXT: li r3, 0 +; P9BE-NEXT: vextuhlx r3, r3, v2 +; P9BE-NEXT: rlwinm r3, r3, 0, 26, 31 +; P9BE-NEXT: sldi r3, r3, 48 +; P9BE-NEXT: mtvsrd v4, r3 +; P9BE-NEXT: li r3, 6 +; P9BE-NEXT: vextuhlx r3, r3, v2 +; P9BE-NEXT: rlwinm r3, r3, 0, 16, 31 +; P9BE-NEXT: lis r5, 22765 +; P9BE-NEXT: ori r5, r5, 8969 +; P9BE-NEXT: vmrghh v3, v4, v3 +; P9BE-NEXT: clrldi r4, r3, 32 +; P9BE-NEXT: mulld r4, r4, r5 +; P9BE-NEXT: rldicl r4, r4, 32, 32 +; P9BE-NEXT: subf r5, r4, r3 +; P9BE-NEXT: srwi r5, r5, 1 +; P9BE-NEXT: add r4, r5, r4 +; P9BE-NEXT: srwi r4, r4, 6 +; P9BE-NEXT: mulli r4, r4, 95 +; P9BE-NEXT: subf r3, r4, r3 +; P9BE-NEXT: sldi r3, r3, 48 +; P9BE-NEXT: mtvsrd v4, r3 +; P9BE-NEXT: li r3, 4 +; P9BE-NEXT: vextuhlx r3, r3, v2 +; P9BE-NEXT: rlwinm r3, r3, 0, 29, 31 +; P9BE-NEXT: sldi r3, r3, 48 +; P9BE-NEXT: mtvsrd v2, r3 +; P9BE-NEXT: vmrghh v2, v2, v4 +; P9BE-NEXT: vmrghw v2, v3, v2 +; P9BE-NEXT: blr +; +; P8LE-LABEL: dont_fold_urem_power_of_two: +; P8LE: # %bb.0: +; P8LE-NEXT: xxswapd vs0, v2 +; P8LE-NEXT: lis r3, 22765 +; P8LE-NEXT: ori r3, r3, 8969 +; P8LE-NEXT: mfvsrd r4, f0 +; P8LE-NEXT: rldicl r5, r4, 16, 48 +; P8LE-NEXT: rlwinm r6, r5, 0, 16, 31 +; P8LE-NEXT: clrldi r7, r6, 32 +; P8LE-NEXT: mulld r3, r7, r3 +; P8LE-NEXT: rldicl r7, r4, 48, 48 +; P8LE-NEXT: rlwinm r7, r7, 0, 27, 31 +; P8LE-NEXT: mtvsrd f1, r7 +; P8LE-NEXT: rldicl r3, r3, 32, 32 +; P8LE-NEXT: xxswapd v3, vs1 +; P8LE-NEXT: subf r6, r3, r6 +; P8LE-NEXT: srwi r6, r6, 1 +; P8LE-NEXT: add r3, r6, r3 +; P8LE-NEXT: clrldi r6, r4, 48 +; P8LE-NEXT: srwi r3, r3, 6 +; P8LE-NEXT: rldicl r4, r4, 32, 48 +; P8LE-NEXT: rlwinm r6, r6, 0, 26, 31 +; P8LE-NEXT: mulli r3, r3, 95 +; P8LE-NEXT: rlwinm r4, r4, 0, 29, 31 +; P8LE-NEXT: mtvsrd f0, r6 +; P8LE-NEXT: mtvsrd f3, r4 +; P8LE-NEXT: xxswapd v2, vs0 +; P8LE-NEXT: xxswapd v5, vs3 +; P8LE-NEXT: subf r3, r3, r5 +; P8LE-NEXT: mtvsrd f2, r3 +; P8LE-NEXT: vmrglh v2, v3, v2 +; P8LE-NEXT: xxswapd v4, vs2 +; P8LE-NEXT: vmrglh v3, v4, v5 +; P8LE-NEXT: vmrglw v2, v3, v2 +; P8LE-NEXT: blr +; +; P8BE-LABEL: dont_fold_urem_power_of_two: +; P8BE: # %bb.0: +; P8BE-NEXT: mfvsrd r4, v2 +; P8BE-NEXT: lis r3, 22765 +; P8BE-NEXT: ori r3, r3, 8969 +; P8BE-NEXT: clrldi r5, r4, 48 +; P8BE-NEXT: rldicl r7, r4, 16, 48 +; P8BE-NEXT: rlwinm r5, r5, 0, 16, 31 +; P8BE-NEXT: rlwinm r7, r7, 0, 26, 31 +; P8BE-NEXT: clrldi r6, r5, 32 +; P8BE-NEXT: mulld r3, r6, r3 +; P8BE-NEXT: rldicl r3, r3, 32, 32 +; P8BE-NEXT: subf r6, r3, r5 +; P8BE-NEXT: srwi r6, r6, 1 +; P8BE-NEXT: add r3, r6, r3 +; P8BE-NEXT: rldicl r6, r4, 32, 48 +; P8BE-NEXT: srwi r3, r3, 6 +; P8BE-NEXT: rldicl r4, r4, 48, 48 +; P8BE-NEXT: rlwinm r6, r6, 0, 27, 31 +; P8BE-NEXT: mulli r3, r3, 95 +; P8BE-NEXT: sldi r6, r6, 48 +; P8BE-NEXT: rlwinm r4, r4, 0, 29, 31 +; P8BE-NEXT: mtvsrd v2, r6 +; P8BE-NEXT: sldi r6, r7, 48 +; P8BE-NEXT: sldi r4, r4, 48 +; P8BE-NEXT: mtvsrd v3, r6 +; P8BE-NEXT: mtvsrd v5, r4 +; P8BE-NEXT: subf r3, r3, r5 +; P8BE-NEXT: vmrghh v2, v3, v2 +; P8BE-NEXT: sldi r3, r3, 48 +; P8BE-NEXT: mtvsrd v4, r3 +; P8BE-NEXT: vmrghh v3, v5, v4 +; P8BE-NEXT: vmrghw v2, v2, v3 +; P8BE-NEXT: blr + %1 = urem <4 x i16> %x, + ret <4 x i16> %1 +} + +; Don't fold if the divisor is one. +define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) { +; P9LE-LABEL: dont_fold_urem_one: +; P9LE: # %bb.0: +; P9LE-NEXT: li r3, 4 +; P9LE-NEXT: li r5, 0 +; P9LE-NEXT: vextuhrx r3, r3, v2 +; P9LE-NEXT: oris r6, r5, 45590 +; P9LE-NEXT: oris r5, r5, 51306 +; P9LE-NEXT: ori r6, r6, 17097 +; P9LE-NEXT: ori r5, r5, 30865 +; P9LE-NEXT: rlwinm r4, r3, 0, 16, 31 +; P9LE-NEXT: mulld r4, r4, r6 +; P9LE-NEXT: lis r6, 24749 +; P9LE-NEXT: ori r6, r6, 47143 +; P9LE-NEXT: rldicl r4, r4, 28, 36 +; P9LE-NEXT: mulli r4, r4, 23 +; P9LE-NEXT: subf r3, r4, r3 +; P9LE-NEXT: mtvsrd f0, r3 +; P9LE-NEXT: li r3, 6 +; P9LE-NEXT: vextuhrx r3, r3, v2 +; P9LE-NEXT: rlwinm r4, r3, 0, 16, 31 +; P9LE-NEXT: mulld r4, r4, r6 +; P9LE-NEXT: rldicl r4, r4, 21, 43 +; P9LE-NEXT: mulli r4, r4, 5423 +; P9LE-NEXT: subf r3, r4, r3 +; P9LE-NEXT: xxswapd v3, vs0 +; P9LE-NEXT: mtvsrd f0, r3 +; P9LE-NEXT: li r3, 2 +; P9LE-NEXT: vextuhrx r3, r3, v2 +; P9LE-NEXT: rlwinm r4, r3, 31, 17, 31 +; P9LE-NEXT: mulld r4, r4, r5 +; P9LE-NEXT: rldicl r4, r4, 24, 40 +; P9LE-NEXT: mulli r4, r4, 654 +; P9LE-NEXT: subf r3, r4, r3 +; P9LE-NEXT: xxswapd v4, vs0 +; P9LE-NEXT: mtvsrd f0, r3 +; P9LE-NEXT: xxswapd v2, vs0 +; P9LE-NEXT: vmrglh v3, v4, v3 +; P9LE-NEXT: xxlxor v4, v4, v4 +; P9LE-NEXT: vmrglh v2, v2, v4 +; P9LE-NEXT: vmrglw v2, v3, v2 +; P9LE-NEXT: blr +; +; P9BE-LABEL: dont_fold_urem_one: +; P9BE: # %bb.0: +; P9BE-NEXT: li r3, 6 +; P9BE-NEXT: vextuhlx r3, r3, v2 +; P9BE-NEXT: rlwinm r3, r3, 0, 16, 31 +; P9BE-NEXT: lis r5, 24749 +; P9BE-NEXT: ori r5, r5, 47143 +; P9BE-NEXT: clrldi r4, r3, 32 +; P9BE-NEXT: mulld r4, r4, r5 +; P9BE-NEXT: li r5, 0 +; P9BE-NEXT: oris r6, r5, 45590 +; P9BE-NEXT: oris r5, r5, 51306 +; P9BE-NEXT: ori r6, r6, 17097 +; P9BE-NEXT: ori r5, r5, 30865 +; P9BE-NEXT: rldicl r4, r4, 21, 43 +; P9BE-NEXT: mulli r4, r4, 5423 +; P9BE-NEXT: subf r3, r4, r3 +; P9BE-NEXT: sldi r3, r3, 48 +; P9BE-NEXT: mtvsrd v3, r3 +; P9BE-NEXT: li r3, 4 +; P9BE-NEXT: vextuhlx r3, r3, v2 +; P9BE-NEXT: rlwinm r3, r3, 0, 16, 31 +; P9BE-NEXT: clrldi r4, r3, 32 +; P9BE-NEXT: mulld r4, r4, r6 +; P9BE-NEXT: rldicl r4, r4, 28, 36 +; P9BE-NEXT: mulli r4, r4, 23 +; P9BE-NEXT: subf r3, r4, r3 +; P9BE-NEXT: sldi r3, r3, 48 +; P9BE-NEXT: mtvsrd v4, r3 +; P9BE-NEXT: li r3, 2 +; P9BE-NEXT: vextuhlx r3, r3, v2 +; P9BE-NEXT: clrlwi r4, r3, 16 +; P9BE-NEXT: rlwinm r3, r3, 31, 17, 31 +; P9BE-NEXT: mulld r3, r3, r5 +; P9BE-NEXT: rldicl r3, r3, 24, 40 +; P9BE-NEXT: mulli r3, r3, 654 +; P9BE-NEXT: subf r3, r3, r4 +; P9BE-NEXT: sldi r3, r3, 48 +; P9BE-NEXT: mtvsrd v2, r3 +; P9BE-NEXT: li r3, 0 +; P9BE-NEXT: sldi r3, r3, 48 +; P9BE-NEXT: vmrghh v3, v4, v3 +; P9BE-NEXT: mtvsrd v4, r3 +; P9BE-NEXT: vmrghh v2, v4, v2 +; P9BE-NEXT: vmrghw v2, v2, v3 +; P9BE-NEXT: blr +; +; P8LE-LABEL: dont_fold_urem_one: +; P8LE: # %bb.0: +; P8LE-NEXT: xxswapd vs0, v2 +; P8LE-NEXT: li r3, 0 +; P8LE-NEXT: lis r8, 24749 +; P8LE-NEXT: xxlxor v5, v5, v5 +; P8LE-NEXT: oris r5, r3, 45590 +; P8LE-NEXT: ori r8, r8, 47143 +; P8LE-NEXT: oris r3, r3, 51306 +; P8LE-NEXT: ori r5, r5, 17097 +; P8LE-NEXT: ori r3, r3, 30865 +; P8LE-NEXT: mfvsrd r4, f0 +; P8LE-NEXT: rldicl r6, r4, 32, 48 +; P8LE-NEXT: rldicl r7, r4, 16, 48 +; P8LE-NEXT: rlwinm r9, r6, 0, 16, 31 +; P8LE-NEXT: rldicl r4, r4, 48, 48 +; P8LE-NEXT: mulld r5, r9, r5 +; P8LE-NEXT: rlwinm r9, r7, 0, 16, 31 +; P8LE-NEXT: mulld r8, r9, r8 +; P8LE-NEXT: rlwinm r9, r4, 31, 17, 31 +; P8LE-NEXT: mulld r3, r9, r3 +; P8LE-NEXT: rldicl r5, r5, 28, 36 +; P8LE-NEXT: rldicl r8, r8, 21, 43 +; P8LE-NEXT: mulli r5, r5, 23 +; P8LE-NEXT: rldicl r3, r3, 24, 40 +; P8LE-NEXT: mulli r8, r8, 5423 +; P8LE-NEXT: mulli r3, r3, 654 +; P8LE-NEXT: subf r5, r5, r6 +; P8LE-NEXT: subf r6, r8, r7 +; P8LE-NEXT: mtvsrd f0, r5 +; P8LE-NEXT: subf r3, r3, r4 +; P8LE-NEXT: mtvsrd f1, r6 +; P8LE-NEXT: mtvsrd f2, r3 +; P8LE-NEXT: xxswapd v2, vs0 +; P8LE-NEXT: xxswapd v3, vs1 +; P8LE-NEXT: xxswapd v4, vs2 +; P8LE-NEXT: vmrglh v2, v3, v2 +; P8LE-NEXT: vmrglh v3, v4, v5 +; P8LE-NEXT: vmrglw v2, v2, v3 +; P8LE-NEXT: blr +; +; P8BE-LABEL: dont_fold_urem_one: +; P8BE: # %bb.0: +; P8BE-NEXT: mfvsrd r4, v2 +; P8BE-NEXT: li r3, 0 +; P8BE-NEXT: lis r8, 24749 +; P8BE-NEXT: oris r6, r3, 51306 +; P8BE-NEXT: ori r8, r8, 47143 +; P8BE-NEXT: oris r3, r3, 45590 +; P8BE-NEXT: rldicl r5, r4, 32, 48 +; P8BE-NEXT: clrldi r7, r4, 48 +; P8BE-NEXT: ori r6, r6, 30865 +; P8BE-NEXT: ori r3, r3, 17097 +; P8BE-NEXT: rldicl r4, r4, 48, 48 +; P8BE-NEXT: rlwinm r9, r5, 31, 17, 31 +; P8BE-NEXT: rlwinm r7, r7, 0, 16, 31 +; P8BE-NEXT: rlwinm r5, r5, 0, 16, 31 +; P8BE-NEXT: rlwinm r4, r4, 0, 16, 31 +; P8BE-NEXT: mulld r6, r9, r6 +; P8BE-NEXT: clrldi r9, r7, 32 +; P8BE-NEXT: mulld r8, r9, r8 +; P8BE-NEXT: clrldi r9, r4, 32 +; P8BE-NEXT: mulld r3, r9, r3 +; P8BE-NEXT: li r9, 0 +; P8BE-NEXT: rldicl r6, r6, 24, 40 +; P8BE-NEXT: mulli r6, r6, 654 +; P8BE-NEXT: rldicl r8, r8, 21, 43 +; P8BE-NEXT: rldicl r3, r3, 28, 36 +; P8BE-NEXT: mulli r8, r8, 5423 +; P8BE-NEXT: mulli r3, r3, 23 +; P8BE-NEXT: subf r5, r6, r5 +; P8BE-NEXT: sldi r6, r9, 48 +; P8BE-NEXT: mtvsrd v2, r6 +; P8BE-NEXT: sldi r5, r5, 48 +; P8BE-NEXT: subf r6, r8, r7 +; P8BE-NEXT: mtvsrd v3, r5 +; P8BE-NEXT: subf r3, r3, r4 +; P8BE-NEXT: sldi r4, r6, 48 +; P8BE-NEXT: sldi r3, r3, 48 +; P8BE-NEXT: mtvsrd v4, r4 +; P8BE-NEXT: mtvsrd v5, r3 +; P8BE-NEXT: vmrghh v2, v2, v3 +; P8BE-NEXT: vmrghh v3, v5, v4 +; P8BE-NEXT: vmrghw v2, v2, v3 +; P8BE-NEXT: blr + %1 = urem <4 x i16> %x, + ret <4 x i16> %1 +} + +; Don't fold if the divisor is 2^16. +define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) { +; CHECK-LABEL: dont_fold_urem_i16_smax: +; CHECK: # %bb.0: +; CHECK-NEXT: blr + %1 = urem <4 x i16> %x, + ret <4 x i16> %1 +} + +; Don't fold i64 urem. +define <4 x i64> @dont_fold_urem_i64(<4 x i64> %x) { +; P9LE-LABEL: dont_fold_urem_i64: +; P9LE: # %bb.0: +; P9LE-NEXT: lis r4, 25644 +; P9LE-NEXT: ori r4, r4, 34192 +; P9LE-NEXT: sldi r4, r4, 32 +; P9LE-NEXT: oris r4, r4, 45590 +; P9LE-NEXT: mfvsrld r3, v3 +; P9LE-NEXT: ori r4, r4, 17097 +; P9LE-NEXT: mulhdu r4, r3, r4 +; P9LE-NEXT: sub r5, r3, r4 +; P9LE-NEXT: rldicl r5, r5, 63, 1 +; P9LE-NEXT: add r4, r5, r4 +; P9LE-NEXT: lis r5, -16037 +; P9LE-NEXT: rldicl r4, r4, 60, 4 +; P9LE-NEXT: ori r5, r5, 28749 +; P9LE-NEXT: mulli r4, r4, 23 +; P9LE-NEXT: sldi r5, r5, 32 +; P9LE-NEXT: oris r5, r5, 52170 +; P9LE-NEXT: ori r5, r5, 12109 +; P9LE-NEXT: sub r3, r3, r4 +; P9LE-NEXT: mfvsrd r4, v3 +; P9LE-NEXT: mulhdu r5, r4, r5 +; P9LE-NEXT: rldicl r5, r5, 52, 12 +; P9LE-NEXT: mulli r5, r5, 5423 +; P9LE-NEXT: sub r4, r4, r5 +; P9LE-NEXT: lis r5, 25653 +; P9LE-NEXT: ori r5, r5, 15432 +; P9LE-NEXT: sldi r5, r5, 32 +; P9LE-NEXT: mtvsrdd v3, r4, r3 +; P9LE-NEXT: mfvsrd r3, v2 +; P9LE-NEXT: rldicl r4, r3, 63, 1 +; P9LE-NEXT: oris r5, r5, 1603 +; P9LE-NEXT: ori r5, r5, 21445 +; P9LE-NEXT: mulhdu r4, r4, r5 +; P9LE-NEXT: rldicl r4, r4, 57, 7 +; P9LE-NEXT: mulli r4, r4, 654 +; P9LE-NEXT: sub r3, r3, r4 +; P9LE-NEXT: li r4, 0 +; P9LE-NEXT: mtvsrdd v2, r3, r4 +; P9LE-NEXT: blr +; +; P9BE-LABEL: dont_fold_urem_i64: +; P9BE: # %bb.0: +; P9BE-NEXT: lis r4, 25644 +; P9BE-NEXT: ori r4, r4, 34192 +; P9BE-NEXT: sldi r4, r4, 32 +; P9BE-NEXT: oris r4, r4, 45590 +; P9BE-NEXT: mfvsrd r3, v3 +; P9BE-NEXT: ori r4, r4, 17097 +; P9BE-NEXT: mulhdu r4, r3, r4 +; P9BE-NEXT: sub r5, r3, r4 +; P9BE-NEXT: rldicl r5, r5, 63, 1 +; P9BE-NEXT: add r4, r5, r4 +; P9BE-NEXT: lis r5, -16037 +; P9BE-NEXT: rldicl r4, r4, 60, 4 +; P9BE-NEXT: mulli r4, r4, 23 +; P9BE-NEXT: ori r5, r5, 28749 +; P9BE-NEXT: sldi r5, r5, 32 +; P9BE-NEXT: oris r5, r5, 52170 +; P9BE-NEXT: ori r5, r5, 12109 +; P9BE-NEXT: sub r3, r3, r4 +; P9BE-NEXT: mfvsrld r4, v3 +; P9BE-NEXT: mulhdu r5, r4, r5 +; P9BE-NEXT: rldicl r5, r5, 52, 12 +; P9BE-NEXT: mulli r5, r5, 5423 +; P9BE-NEXT: sub r4, r4, r5 +; P9BE-NEXT: lis r5, 25653 +; P9BE-NEXT: ori r5, r5, 15432 +; P9BE-NEXT: sldi r5, r5, 32 +; P9BE-NEXT: mtvsrdd v3, r3, r4 +; P9BE-NEXT: mfvsrld r3, v2 +; P9BE-NEXT: rldicl r4, r3, 63, 1 +; P9BE-NEXT: oris r5, r5, 1603 +; P9BE-NEXT: ori r5, r5, 21445 +; P9BE-NEXT: mulhdu r4, r4, r5 +; P9BE-NEXT: rldicl r4, r4, 57, 7 +; P9BE-NEXT: mulli r4, r4, 654 +; P9BE-NEXT: sub r3, r3, r4 +; P9BE-NEXT: mtvsrdd v2, 0, r3 +; P9BE-NEXT: blr +; +; P8LE-LABEL: dont_fold_urem_i64: +; P8LE: # %bb.0: +; P8LE-NEXT: lis r3, 25644 +; P8LE-NEXT: xxswapd vs0, v3 +; P8LE-NEXT: lis r4, -16037 +; P8LE-NEXT: lis r5, 25653 +; P8LE-NEXT: mfvsrd r6, v2 +; P8LE-NEXT: ori r3, r3, 34192 +; P8LE-NEXT: ori r4, r4, 28749 +; P8LE-NEXT: ori r5, r5, 15432 +; P8LE-NEXT: mfvsrd r8, v3 +; P8LE-NEXT: sldi r3, r3, 32 +; P8LE-NEXT: sldi r4, r4, 32 +; P8LE-NEXT: oris r3, r3, 45590 +; P8LE-NEXT: mfvsrd r7, f0 +; P8LE-NEXT: sldi r5, r5, 32 +; P8LE-NEXT: oris r4, r4, 52170 +; P8LE-NEXT: ori r3, r3, 17097 +; P8LE-NEXT: oris r5, r5, 1603 +; P8LE-NEXT: ori r4, r4, 12109 +; P8LE-NEXT: mulhdu r3, r7, r3 +; P8LE-NEXT: rldicl r9, r6, 63, 1 +; P8LE-NEXT: ori r5, r5, 21445 +; P8LE-NEXT: mulhdu r4, r8, r4 +; P8LE-NEXT: mulhdu r5, r9, r5 +; P8LE-NEXT: sub r9, r7, r3 +; P8LE-NEXT: rldicl r9, r9, 63, 1 +; P8LE-NEXT: rldicl r4, r4, 52, 12 +; P8LE-NEXT: add r3, r9, r3 +; P8LE-NEXT: rldicl r5, r5, 57, 7 +; P8LE-NEXT: mulli r4, r4, 5423 +; P8LE-NEXT: rldicl r3, r3, 60, 4 +; P8LE-NEXT: mulli r5, r5, 654 +; P8LE-NEXT: mulli r3, r3, 23 +; P8LE-NEXT: sub r4, r8, r4 +; P8LE-NEXT: sub r5, r6, r5 +; P8LE-NEXT: mtvsrd f0, r4 +; P8LE-NEXT: sub r3, r7, r3 +; P8LE-NEXT: li r4, 0 +; P8LE-NEXT: mtvsrd f1, r5 +; P8LE-NEXT: mtvsrd f2, r3 +; P8LE-NEXT: mtvsrd f3, r4 +; P8LE-NEXT: xxmrghd v3, vs0, vs2 +; P8LE-NEXT: xxmrghd v2, vs1, vs3 +; P8LE-NEXT: blr +; +; P8BE-LABEL: dont_fold_urem_i64: +; P8BE: # %bb.0: +; P8BE-NEXT: lis r3, 25644 +; P8BE-NEXT: lis r4, -16037 +; P8BE-NEXT: xxswapd vs0, v3 +; P8BE-NEXT: xxswapd vs1, v2 +; P8BE-NEXT: lis r5, 25653 +; P8BE-NEXT: ori r3, r3, 34192 +; P8BE-NEXT: ori r4, r4, 28749 +; P8BE-NEXT: mfvsrd r6, v3 +; P8BE-NEXT: ori r5, r5, 15432 +; P8BE-NEXT: sldi r3, r3, 32 +; P8BE-NEXT: sldi r4, r4, 32 +; P8BE-NEXT: oris r3, r3, 45590 +; P8BE-NEXT: sldi r5, r5, 32 +; P8BE-NEXT: mfvsrd r7, f0 +; P8BE-NEXT: oris r4, r4, 52170 +; P8BE-NEXT: ori r3, r3, 17097 +; P8BE-NEXT: mfvsrd r8, f1 +; P8BE-NEXT: oris r5, r5, 1603 +; P8BE-NEXT: ori r4, r4, 12109 +; P8BE-NEXT: mulhdu r3, r6, r3 +; P8BE-NEXT: ori r5, r5, 21445 +; P8BE-NEXT: mulhdu r4, r7, r4 +; P8BE-NEXT: rldicl r9, r8, 63, 1 +; P8BE-NEXT: mulhdu r5, r9, r5 +; P8BE-NEXT: sub r9, r6, r3 +; P8BE-NEXT: rldicl r9, r9, 63, 1 +; P8BE-NEXT: rldicl r4, r4, 52, 12 +; P8BE-NEXT: add r3, r9, r3 +; P8BE-NEXT: mulli r4, r4, 5423 +; P8BE-NEXT: rldicl r5, r5, 57, 7 +; P8BE-NEXT: rldicl r3, r3, 60, 4 +; P8BE-NEXT: mulli r5, r5, 654 +; P8BE-NEXT: mulli r3, r3, 23 +; P8BE-NEXT: sub r4, r7, r4 +; P8BE-NEXT: mtvsrd f0, r4 +; P8BE-NEXT: sub r4, r8, r5 +; P8BE-NEXT: sub r3, r6, r3 +; P8BE-NEXT: mtvsrd f1, r4 +; P8BE-NEXT: li r4, 0 +; P8BE-NEXT: mtvsrd f2, r3 +; P8BE-NEXT: mtvsrd f3, r4 +; P8BE-NEXT: xxmrghd v3, vs2, vs0 +; P8BE-NEXT: xxmrghd v2, vs3, vs1 +; P8BE-NEXT: blr + %1 = urem <4 x i64> %x, + ret <4 x i64> %1 +} diff --git a/llvm/test/CodeGen/RISCV/srem-lkk.ll b/llvm/test/CodeGen/RISCV/srem-lkk.ll new file mode 100644 index 0000000000000..5211e5291a26c --- /dev/null +++ b/llvm/test/CodeGen/RISCV/srem-lkk.ll @@ -0,0 +1,583 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=CHECK,RV32I %s +; RUN: llc -mtriple=riscv32 -mattr=+m -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=CHECK,RV32IM %s +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=CHECK,RV64I %s +; RUN: llc -mtriple=riscv64 -mattr=+m -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=CHECK,RV64IM %s + +define i32 @fold_srem_positive_odd(i32 %x) { +; RV32I-LABEL: fold_srem_positive_odd: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: addi a1, zero, 95 +; RV32I-NEXT: call __modsi3 +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: .cfi_restore ra +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: .cfi_def_cfa_offset 0 +; RV32I-NEXT: ret +; +; RV32IM-LABEL: fold_srem_positive_odd: +; RV32IM: # %bb.0: +; RV32IM-NEXT: lui a1, 706409 +; RV32IM-NEXT: addi a1, a1, 389 +; RV32IM-NEXT: mulh a1, a0, a1 +; RV32IM-NEXT: add a1, a1, a0 +; RV32IM-NEXT: srli a2, a1, 31 +; RV32IM-NEXT: srai a1, a1, 6 +; RV32IM-NEXT: add a1, a1, a2 +; RV32IM-NEXT: addi a2, zero, 95 +; RV32IM-NEXT: mul a1, a1, a2 +; RV32IM-NEXT: sub a0, a0, a1 +; RV32IM-NEXT: .cfi_def_cfa_offset 0 +; RV32IM-NEXT: ret +; +; RV64I-LABEL: fold_srem_positive_odd: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: .cfi_def_cfa_offset 16 +; RV64I-NEXT: sd ra, 8(sp) +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: sext.w a0, a0 +; RV64I-NEXT: addi a1, zero, 95 +; RV64I-NEXT: call __moddi3 +; RV64I-NEXT: ld ra, 8(sp) +; RV64I-NEXT: .cfi_restore ra +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: .cfi_def_cfa_offset 0 +; RV64I-NEXT: ret +; +; RV64IM-LABEL: fold_srem_positive_odd: +; RV64IM: # %bb.0: +; RV64IM-NEXT: sext.w a0, a0 +; RV64IM-NEXT: lui a1, 1045903 +; RV64IM-NEXT: addiw a1, a1, -733 +; RV64IM-NEXT: slli a1, a1, 15 +; RV64IM-NEXT: addi a1, a1, 1035 +; RV64IM-NEXT: slli a1, a1, 12 +; RV64IM-NEXT: addi a1, a1, -905 +; RV64IM-NEXT: slli a1, a1, 12 +; RV64IM-NEXT: addi a1, a1, -1767 +; RV64IM-NEXT: mulh a1, a0, a1 +; RV64IM-NEXT: add a1, a1, a0 +; RV64IM-NEXT: srli a2, a1, 63 +; RV64IM-NEXT: srai a1, a1, 6 +; RV64IM-NEXT: add a1, a1, a2 +; RV64IM-NEXT: addi a2, zero, 95 +; RV64IM-NEXT: mul a1, a1, a2 +; RV64IM-NEXT: sub a0, a0, a1 +; RV64IM-NEXT: .cfi_def_cfa_offset 0 +; RV64IM-NEXT: ret + %1 = srem i32 %x, 95 + ret i32 %1 +} + + +define i32 @fold_srem_positive_even(i32 %x) { +; RV32I-LABEL: fold_srem_positive_even: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: addi a1, zero, 1060 +; RV32I-NEXT: call __modsi3 +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: .cfi_restore ra +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: .cfi_def_cfa_offset 0 +; RV32I-NEXT: ret +; +; RV32IM-LABEL: fold_srem_positive_even: +; RV32IM: # %bb.0: +; RV32IM-NEXT: lui a1, 253241 +; RV32IM-NEXT: addi a1, a1, -15 +; RV32IM-NEXT: mulh a1, a0, a1 +; RV32IM-NEXT: srli a2, a1, 31 +; RV32IM-NEXT: srai a1, a1, 8 +; RV32IM-NEXT: add a1, a1, a2 +; RV32IM-NEXT: addi a2, zero, 1060 +; RV32IM-NEXT: mul a1, a1, a2 +; RV32IM-NEXT: sub a0, a0, a1 +; RV32IM-NEXT: .cfi_def_cfa_offset 0 +; RV32IM-NEXT: ret +; +; RV64I-LABEL: fold_srem_positive_even: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: .cfi_def_cfa_offset 16 +; RV64I-NEXT: sd ra, 8(sp) +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: sext.w a0, a0 +; RV64I-NEXT: addi a1, zero, 1060 +; RV64I-NEXT: call __moddi3 +; RV64I-NEXT: ld ra, 8(sp) +; RV64I-NEXT: .cfi_restore ra +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: .cfi_def_cfa_offset 0 +; RV64I-NEXT: ret +; +; RV64IM-LABEL: fold_srem_positive_even: +; RV64IM: # %bb.0: +; RV64IM-NEXT: sext.w a0, a0 +; RV64IM-NEXT: lui a1, 506482 +; RV64IM-NEXT: addiw a1, a1, -31 +; RV64IM-NEXT: slli a1, a1, 13 +; RV64IM-NEXT: addi a1, a1, 711 +; RV64IM-NEXT: slli a1, a1, 19 +; RV64IM-NEXT: addi a1, a1, 1979 +; RV64IM-NEXT: mulh a1, a0, a1 +; RV64IM-NEXT: srli a2, a1, 63 +; RV64IM-NEXT: srai a1, a1, 9 +; RV64IM-NEXT: add a1, a1, a2 +; RV64IM-NEXT: addi a2, zero, 1060 +; RV64IM-NEXT: mul a1, a1, a2 +; RV64IM-NEXT: sub a0, a0, a1 +; RV64IM-NEXT: .cfi_def_cfa_offset 0 +; RV64IM-NEXT: ret + %1 = srem i32 %x, 1060 + ret i32 %1 +} + + +define i32 @fold_srem_negative_odd(i32 %x) { +; RV32I-LABEL: fold_srem_negative_odd: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: addi a1, zero, -723 +; RV32I-NEXT: call __modsi3 +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: .cfi_restore ra +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: .cfi_def_cfa_offset 0 +; RV32I-NEXT: ret +; +; RV32IM-LABEL: fold_srem_negative_odd: +; RV32IM: # %bb.0: +; RV32IM-NEXT: lui a1, 677296 +; RV32IM-NEXT: addi a1, a1, -91 +; RV32IM-NEXT: mulh a1, a0, a1 +; RV32IM-NEXT: srli a2, a1, 31 +; RV32IM-NEXT: srai a1, a1, 8 +; RV32IM-NEXT: add a1, a1, a2 +; RV32IM-NEXT: addi a2, zero, -723 +; RV32IM-NEXT: mul a1, a1, a2 +; RV32IM-NEXT: sub a0, a0, a1 +; RV32IM-NEXT: .cfi_def_cfa_offset 0 +; RV32IM-NEXT: ret +; +; RV64I-LABEL: fold_srem_negative_odd: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: .cfi_def_cfa_offset 16 +; RV64I-NEXT: sd ra, 8(sp) +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: sext.w a0, a0 +; RV64I-NEXT: addi a1, zero, -723 +; RV64I-NEXT: call __moddi3 +; RV64I-NEXT: ld ra, 8(sp) +; RV64I-NEXT: .cfi_restore ra +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: .cfi_def_cfa_offset 0 +; RV64I-NEXT: ret +; +; RV64IM-LABEL: fold_srem_negative_odd: +; RV64IM: # %bb.0: +; RV64IM-NEXT: sext.w a0, a0 +; RV64IM-NEXT: lui a1, 4781 +; RV64IM-NEXT: addiw a1, a1, 2045 +; RV64IM-NEXT: slli a1, a1, 13 +; RV64IM-NEXT: addi a1, a1, 1371 +; RV64IM-NEXT: slli a1, a1, 13 +; RV64IM-NEXT: addi a1, a1, -11 +; RV64IM-NEXT: slli a1, a1, 12 +; RV64IM-NEXT: addi a1, a1, -1355 +; RV64IM-NEXT: mulh a1, a0, a1 +; RV64IM-NEXT: sub a1, a1, a0 +; RV64IM-NEXT: srli a2, a1, 63 +; RV64IM-NEXT: srai a1, a1, 9 +; RV64IM-NEXT: add a1, a1, a2 +; RV64IM-NEXT: addi a2, zero, -723 +; RV64IM-NEXT: mul a1, a1, a2 +; RV64IM-NEXT: sub a0, a0, a1 +; RV64IM-NEXT: .cfi_def_cfa_offset 0 +; RV64IM-NEXT: ret + %1 = srem i32 %x, -723 + ret i32 %1 +} + + +define i32 @fold_srem_negative_even(i32 %x) { +; RV32I-LABEL: fold_srem_negative_even: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: lui a1, 1048570 +; RV32I-NEXT: addi a1, a1, 1595 +; RV32I-NEXT: call __modsi3 +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: .cfi_restore ra +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: .cfi_def_cfa_offset 0 +; RV32I-NEXT: ret +; +; RV32IM-LABEL: fold_srem_negative_even: +; RV32IM: # %bb.0: +; RV32IM-NEXT: lui a1, 1036895 +; RV32IM-NEXT: addi a1, a1, 999 +; RV32IM-NEXT: mulh a1, a0, a1 +; RV32IM-NEXT: srli a2, a1, 31 +; RV32IM-NEXT: srai a1, a1, 8 +; RV32IM-NEXT: add a1, a1, a2 +; RV32IM-NEXT: lui a2, 1048570 +; RV32IM-NEXT: addi a2, a2, 1595 +; RV32IM-NEXT: mul a1, a1, a2 +; RV32IM-NEXT: sub a0, a0, a1 +; RV32IM-NEXT: .cfi_def_cfa_offset 0 +; RV32IM-NEXT: ret +; +; RV64I-LABEL: fold_srem_negative_even: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: .cfi_def_cfa_offset 16 +; RV64I-NEXT: sd ra, 8(sp) +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: sext.w a0, a0 +; RV64I-NEXT: lui a1, 1048570 +; RV64I-NEXT: addiw a1, a1, 1595 +; RV64I-NEXT: call __moddi3 +; RV64I-NEXT: ld ra, 8(sp) +; RV64I-NEXT: .cfi_restore ra +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: .cfi_def_cfa_offset 0 +; RV64I-NEXT: ret +; +; RV64IM-LABEL: fold_srem_negative_even: +; RV64IM: # %bb.0: +; RV64IM-NEXT: sext.w a0, a0 +; RV64IM-NEXT: lui a1, 1036895 +; RV64IM-NEXT: addiw a1, a1, 999 +; RV64IM-NEXT: slli a1, a1, 12 +; RV64IM-NEXT: addi a1, a1, 11 +; RV64IM-NEXT: slli a1, a1, 12 +; RV64IM-NEXT: addi a1, a1, -523 +; RV64IM-NEXT: slli a1, a1, 12 +; RV64IM-NEXT: addi a1, a1, -481 +; RV64IM-NEXT: mulh a1, a0, a1 +; RV64IM-NEXT: srli a2, a1, 63 +; RV64IM-NEXT: srai a1, a1, 12 +; RV64IM-NEXT: add a1, a1, a2 +; RV64IM-NEXT: lui a2, 1048570 +; RV64IM-NEXT: addiw a2, a2, 1595 +; RV64IM-NEXT: mul a1, a1, a2 +; RV64IM-NEXT: sub a0, a0, a1 +; RV64IM-NEXT: .cfi_def_cfa_offset 0 +; RV64IM-NEXT: ret + %1 = srem i32 %x, -22981 + ret i32 %1 +} + + +; Don't fold if we can combine srem with sdiv. +define i32 @combine_srem_sdiv(i32 %x) { +; RV32I-LABEL: combine_srem_sdiv: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: sw s0, 8(sp) +; RV32I-NEXT: sw s1, 4(sp) +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: .cfi_offset s0, -8 +; RV32I-NEXT: .cfi_offset s1, -12 +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: addi a1, zero, 95 +; RV32I-NEXT: call __modsi3 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: addi a1, zero, 95 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: call __divsi3 +; RV32I-NEXT: add a0, s1, a0 +; RV32I-NEXT: lw s1, 4(sp) +; RV32I-NEXT: lw s0, 8(sp) +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: .cfi_restore ra +; RV32I-NEXT: .cfi_restore s0 +; RV32I-NEXT: .cfi_restore s1 +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: .cfi_def_cfa_offset 0 +; RV32I-NEXT: ret +; +; RV32IM-LABEL: combine_srem_sdiv: +; RV32IM: # %bb.0: +; RV32IM-NEXT: lui a1, 706409 +; RV32IM-NEXT: addi a1, a1, 389 +; RV32IM-NEXT: mulh a1, a0, a1 +; RV32IM-NEXT: add a1, a1, a0 +; RV32IM-NEXT: srli a2, a1, 31 +; RV32IM-NEXT: srai a1, a1, 6 +; RV32IM-NEXT: add a1, a1, a2 +; RV32IM-NEXT: addi a2, zero, 95 +; RV32IM-NEXT: mul a2, a1, a2 +; RV32IM-NEXT: sub a0, a0, a2 +; RV32IM-NEXT: add a0, a0, a1 +; RV32IM-NEXT: .cfi_def_cfa_offset 0 +; RV32IM-NEXT: ret +; +; RV64I-LABEL: combine_srem_sdiv: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: .cfi_def_cfa_offset 32 +; RV64I-NEXT: sd ra, 24(sp) +; RV64I-NEXT: sd s0, 16(sp) +; RV64I-NEXT: sd s1, 8(sp) +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: .cfi_offset s0, -16 +; RV64I-NEXT: .cfi_offset s1, -24 +; RV64I-NEXT: sext.w s0, a0 +; RV64I-NEXT: addi a1, zero, 95 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: call __moddi3 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: addi a1, zero, 95 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: call __divdi3 +; RV64I-NEXT: addw a0, s1, a0 +; RV64I-NEXT: ld s1, 8(sp) +; RV64I-NEXT: ld s0, 16(sp) +; RV64I-NEXT: ld ra, 24(sp) +; RV64I-NEXT: .cfi_restore ra +; RV64I-NEXT: .cfi_restore s0 +; RV64I-NEXT: .cfi_restore s1 +; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: .cfi_def_cfa_offset 0 +; RV64I-NEXT: ret +; +; RV64IM-LABEL: combine_srem_sdiv: +; RV64IM: # %bb.0: +; RV64IM-NEXT: sext.w a1, a0 +; RV64IM-NEXT: lui a2, 1045903 +; RV64IM-NEXT: addiw a2, a2, -733 +; RV64IM-NEXT: slli a2, a2, 15 +; RV64IM-NEXT: addi a2, a2, 1035 +; RV64IM-NEXT: slli a2, a2, 12 +; RV64IM-NEXT: addi a2, a2, -905 +; RV64IM-NEXT: slli a2, a2, 12 +; RV64IM-NEXT: addi a2, a2, -1767 +; RV64IM-NEXT: mulh a2, a1, a2 +; RV64IM-NEXT: add a1, a2, a1 +; RV64IM-NEXT: srli a2, a1, 63 +; RV64IM-NEXT: srai a1, a1, 6 +; RV64IM-NEXT: add a1, a1, a2 +; RV64IM-NEXT: addi a2, zero, 95 +; RV64IM-NEXT: mul a2, a1, a2 +; RV64IM-NEXT: sub a0, a0, a2 +; RV64IM-NEXT: addw a0, a0, a1 +; RV64IM-NEXT: .cfi_def_cfa_offset 0 +; RV64IM-NEXT: ret + %1 = srem i32 %x, 95 + %2 = sdiv i32 %x, 95 + %3 = add i32 %1, %2 + ret i32 %3 +} + +; Don't fold for divisors that are a power of two. +define i32 @dont_fold_srem_power_of_two(i32 %x) { +; RV32I-LABEL: dont_fold_srem_power_of_two: +; RV32I: # %bb.0: +; RV32I-NEXT: srai a1, a0, 31 +; RV32I-NEXT: srli a1, a1, 26 +; RV32I-NEXT: add a1, a0, a1 +; RV32I-NEXT: andi a1, a1, -64 +; RV32I-NEXT: sub a0, a0, a1 +; RV32I-NEXT: .cfi_def_cfa_offset 0 +; RV32I-NEXT: ret +; +; RV32IM-LABEL: dont_fold_srem_power_of_two: +; RV32IM: # %bb.0: +; RV32IM-NEXT: srai a1, a0, 31 +; RV32IM-NEXT: srli a1, a1, 26 +; RV32IM-NEXT: add a1, a0, a1 +; RV32IM-NEXT: andi a1, a1, -64 +; RV32IM-NEXT: sub a0, a0, a1 +; RV32IM-NEXT: .cfi_def_cfa_offset 0 +; RV32IM-NEXT: ret +; +; RV64I-LABEL: dont_fold_srem_power_of_two: +; RV64I: # %bb.0: +; RV64I-NEXT: sext.w a1, a0 +; RV64I-NEXT: srli a1, a1, 57 +; RV64I-NEXT: andi a1, a1, 63 +; RV64I-NEXT: add a1, a0, a1 +; RV64I-NEXT: addi a2, zero, 1 +; RV64I-NEXT: slli a2, a2, 32 +; RV64I-NEXT: addi a2, a2, -64 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: subw a0, a0, a1 +; RV64I-NEXT: .cfi_def_cfa_offset 0 +; RV64I-NEXT: ret +; +; RV64IM-LABEL: dont_fold_srem_power_of_two: +; RV64IM: # %bb.0: +; RV64IM-NEXT: sext.w a1, a0 +; RV64IM-NEXT: srli a1, a1, 57 +; RV64IM-NEXT: andi a1, a1, 63 +; RV64IM-NEXT: add a1, a0, a1 +; RV64IM-NEXT: addi a2, zero, 1 +; RV64IM-NEXT: slli a2, a2, 32 +; RV64IM-NEXT: addi a2, a2, -64 +; RV64IM-NEXT: and a1, a1, a2 +; RV64IM-NEXT: subw a0, a0, a1 +; RV64IM-NEXT: .cfi_def_cfa_offset 0 +; RV64IM-NEXT: ret + %1 = srem i32 %x, 64 + ret i32 %1 +} + +; Don't fold if the divisor is one. +define i32 @dont_fold_srem_one(i32 %x) { +; CHECK-LABEL: dont_fold_srem_one: +; CHECK: # %bb.0: +; CHECK-NEXT: mv a0, zero +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: ret + %1 = srem i32 %x, 1 + ret i32 %1 +} + +; Don't fold if the divisor is 2^31. +define i32 @dont_fold_srem_i32_smax(i32 %x) { +; RV32I-LABEL: dont_fold_srem_i32_smax: +; RV32I: # %bb.0: +; RV32I-NEXT: srai a1, a0, 31 +; RV32I-NEXT: srli a1, a1, 1 +; RV32I-NEXT: add a1, a0, a1 +; RV32I-NEXT: lui a2, 524288 +; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: .cfi_def_cfa_offset 0 +; RV32I-NEXT: ret +; +; RV32IM-LABEL: dont_fold_srem_i32_smax: +; RV32IM: # %bb.0: +; RV32IM-NEXT: srai a1, a0, 31 +; RV32IM-NEXT: srli a1, a1, 1 +; RV32IM-NEXT: add a1, a0, a1 +; RV32IM-NEXT: lui a2, 524288 +; RV32IM-NEXT: and a1, a1, a2 +; RV32IM-NEXT: add a0, a0, a1 +; RV32IM-NEXT: .cfi_def_cfa_offset 0 +; RV32IM-NEXT: ret +; +; RV64I-LABEL: dont_fold_srem_i32_smax: +; RV64I: # %bb.0: +; RV64I-NEXT: sext.w a1, a0 +; RV64I-NEXT: srli a1, a1, 32 +; RV64I-NEXT: lui a2, 524288 +; RV64I-NEXT: addiw a2, a2, -1 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: add a1, a0, a1 +; RV64I-NEXT: addi a2, zero, 1 +; RV64I-NEXT: slli a2, a2, 31 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: addw a0, a0, a1 +; RV64I-NEXT: .cfi_def_cfa_offset 0 +; RV64I-NEXT: ret +; +; RV64IM-LABEL: dont_fold_srem_i32_smax: +; RV64IM: # %bb.0: +; RV64IM-NEXT: sext.w a1, a0 +; RV64IM-NEXT: srli a1, a1, 32 +; RV64IM-NEXT: lui a2, 524288 +; RV64IM-NEXT: addiw a2, a2, -1 +; RV64IM-NEXT: and a1, a1, a2 +; RV64IM-NEXT: add a1, a0, a1 +; RV64IM-NEXT: addi a2, zero, 1 +; RV64IM-NEXT: slli a2, a2, 31 +; RV64IM-NEXT: and a1, a1, a2 +; RV64IM-NEXT: addw a0, a0, a1 +; RV64IM-NEXT: .cfi_def_cfa_offset 0 +; RV64IM-NEXT: ret + %1 = srem i32 %x, 2147483648 + ret i32 %1 +} + +; Don't fold i64 srem +define i64 @dont_fold_srem_i64(i64 %x) { +; RV32I-LABEL: dont_fold_srem_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: addi a2, zero, 98 +; RV32I-NEXT: mv a3, zero +; RV32I-NEXT: call __moddi3 +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: .cfi_restore ra +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: .cfi_def_cfa_offset 0 +; RV32I-NEXT: ret +; +; RV32IM-LABEL: dont_fold_srem_i64: +; RV32IM: # %bb.0: +; RV32IM-NEXT: addi sp, sp, -16 +; RV32IM-NEXT: .cfi_def_cfa_offset 16 +; RV32IM-NEXT: sw ra, 12(sp) +; RV32IM-NEXT: .cfi_offset ra, -4 +; RV32IM-NEXT: addi a2, zero, 98 +; RV32IM-NEXT: mv a3, zero +; RV32IM-NEXT: call __moddi3 +; RV32IM-NEXT: lw ra, 12(sp) +; RV32IM-NEXT: .cfi_restore ra +; RV32IM-NEXT: addi sp, sp, 16 +; RV32IM-NEXT: .cfi_def_cfa_offset 0 +; RV32IM-NEXT: ret +; +; RV64I-LABEL: dont_fold_srem_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: .cfi_def_cfa_offset 16 +; RV64I-NEXT: sd ra, 8(sp) +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: addi a1, zero, 98 +; RV64I-NEXT: call __moddi3 +; RV64I-NEXT: ld ra, 8(sp) +; RV64I-NEXT: .cfi_restore ra +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: .cfi_def_cfa_offset 0 +; RV64I-NEXT: ret +; +; RV64IM-LABEL: dont_fold_srem_i64: +; RV64IM: # %bb.0: +; RV64IM-NEXT: lui a1, 2675 +; RV64IM-NEXT: addiw a1, a1, -251 +; RV64IM-NEXT: slli a1, a1, 13 +; RV64IM-NEXT: addi a1, a1, 1839 +; RV64IM-NEXT: slli a1, a1, 13 +; RV64IM-NEXT: addi a1, a1, 167 +; RV64IM-NEXT: slli a1, a1, 13 +; RV64IM-NEXT: addi a1, a1, 1505 +; RV64IM-NEXT: mulh a1, a0, a1 +; RV64IM-NEXT: srli a2, a1, 63 +; RV64IM-NEXT: srai a1, a1, 5 +; RV64IM-NEXT: add a1, a1, a2 +; RV64IM-NEXT: addi a2, zero, 98 +; RV64IM-NEXT: mul a1, a1, a2 +; RV64IM-NEXT: sub a0, a0, a1 +; RV64IM-NEXT: .cfi_def_cfa_offset 0 +; RV64IM-NEXT: ret + %1 = srem i64 %x, 98 + ret i64 %1 +} diff --git a/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll b/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll new file mode 100644 index 0000000000000..ad7af93fce2f9 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll @@ -0,0 +1,1689 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=CHECK,RV32I %s +; RUN: llc -mtriple=riscv32 -mattr=+m -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=CHECK,RV32IM %s +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=CHECK,RV64I %s +; RUN: llc -mtriple=riscv64 -mattr=+m -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=CHECK,RV64IM %s + +define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) { +; RV32I-LABEL: fold_srem_vec_1: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -32 +; RV32I-NEXT: .cfi_def_cfa_offset 32 +; RV32I-NEXT: sw ra, 28(sp) +; RV32I-NEXT: sw s0, 24(sp) +; RV32I-NEXT: sw s1, 20(sp) +; RV32I-NEXT: sw s2, 16(sp) +; RV32I-NEXT: sw s3, 12(sp) +; RV32I-NEXT: sw s4, 8(sp) +; RV32I-NEXT: sw s5, 4(sp) +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: .cfi_offset s0, -8 +; RV32I-NEXT: .cfi_offset s1, -12 +; RV32I-NEXT: .cfi_offset s2, -16 +; RV32I-NEXT: .cfi_offset s3, -20 +; RV32I-NEXT: .cfi_offset s4, -24 +; RV32I-NEXT: .cfi_offset s5, -28 +; RV32I-NEXT: lh s2, 12(a1) +; RV32I-NEXT: lh s3, 8(a1) +; RV32I-NEXT: lh s0, 4(a1) +; RV32I-NEXT: lh a2, 0(a1) +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: addi a1, zero, 95 +; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: call __modsi3 +; RV32I-NEXT: mv s4, a0 +; RV32I-NEXT: addi a1, zero, -124 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: call __modsi3 +; RV32I-NEXT: mv s5, a0 +; RV32I-NEXT: addi a1, zero, 98 +; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: call __modsi3 +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: addi a1, zero, -1003 +; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: call __modsi3 +; RV32I-NEXT: sh a0, 6(s1) +; RV32I-NEXT: sh s0, 4(s1) +; RV32I-NEXT: sh s5, 2(s1) +; RV32I-NEXT: sh s4, 0(s1) +; RV32I-NEXT: lw s5, 4(sp) +; RV32I-NEXT: lw s4, 8(sp) +; RV32I-NEXT: lw s3, 12(sp) +; RV32I-NEXT: lw s2, 16(sp) +; RV32I-NEXT: lw s1, 20(sp) +; RV32I-NEXT: lw s0, 24(sp) +; RV32I-NEXT: lw ra, 28(sp) +; RV32I-NEXT: .cfi_restore ra +; RV32I-NEXT: .cfi_restore s0 +; RV32I-NEXT: .cfi_restore s1 +; RV32I-NEXT: .cfi_restore s2 +; RV32I-NEXT: .cfi_restore s3 +; RV32I-NEXT: .cfi_restore s4 +; RV32I-NEXT: .cfi_restore s5 +; RV32I-NEXT: addi sp, sp, 32 +; RV32I-NEXT: .cfi_def_cfa_offset 0 +; RV32I-NEXT: ret +; +; RV32IM-LABEL: fold_srem_vec_1: +; RV32IM: # %bb.0: +; RV32IM-NEXT: lh a6, 12(a1) +; RV32IM-NEXT: lh a3, 8(a1) +; RV32IM-NEXT: lh a4, 0(a1) +; RV32IM-NEXT: lh a1, 4(a1) +; RV32IM-NEXT: lui a5, 706409 +; RV32IM-NEXT: addi a5, a5, 389 +; RV32IM-NEXT: mulh a5, a4, a5 +; RV32IM-NEXT: add a5, a5, a4 +; RV32IM-NEXT: srli a2, a5, 31 +; RV32IM-NEXT: srli a5, a5, 6 +; RV32IM-NEXT: add a2, a5, a2 +; RV32IM-NEXT: addi a5, zero, 95 +; RV32IM-NEXT: mul a2, a2, a5 +; RV32IM-NEXT: sub a2, a4, a2 +; RV32IM-NEXT: lui a4, 507375 +; RV32IM-NEXT: addi a4, a4, 1981 +; RV32IM-NEXT: mulh a4, a1, a4 +; RV32IM-NEXT: sub a4, a4, a1 +; RV32IM-NEXT: srli a5, a4, 31 +; RV32IM-NEXT: srli a4, a4, 6 +; RV32IM-NEXT: add a4, a4, a5 +; RV32IM-NEXT: addi a5, zero, -124 +; RV32IM-NEXT: mul a4, a4, a5 +; RV32IM-NEXT: sub a1, a1, a4 +; RV32IM-NEXT: lui a4, 342392 +; RV32IM-NEXT: addi a4, a4, 669 +; RV32IM-NEXT: mulh a4, a3, a4 +; RV32IM-NEXT: srli a5, a4, 31 +; RV32IM-NEXT: srli a4, a4, 5 +; RV32IM-NEXT: add a4, a4, a5 +; RV32IM-NEXT: addi a5, zero, 98 +; RV32IM-NEXT: mul a4, a4, a5 +; RV32IM-NEXT: sub a3, a3, a4 +; RV32IM-NEXT: lui a4, 780943 +; RV32IM-NEXT: addi a4, a4, 1809 +; RV32IM-NEXT: mulh a4, a6, a4 +; RV32IM-NEXT: srli a5, a4, 31 +; RV32IM-NEXT: srli a4, a4, 8 +; RV32IM-NEXT: add a4, a4, a5 +; RV32IM-NEXT: addi a5, zero, -1003 +; RV32IM-NEXT: mul a4, a4, a5 +; RV32IM-NEXT: sub a4, a6, a4 +; RV32IM-NEXT: sh a4, 6(a0) +; RV32IM-NEXT: sh a3, 4(a0) +; RV32IM-NEXT: sh a1, 2(a0) +; RV32IM-NEXT: sh a2, 0(a0) +; RV32IM-NEXT: .cfi_def_cfa_offset 0 +; RV32IM-NEXT: ret +; +; RV64I-LABEL: fold_srem_vec_1: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -64 +; RV64I-NEXT: .cfi_def_cfa_offset 64 +; RV64I-NEXT: sd ra, 56(sp) +; RV64I-NEXT: sd s0, 48(sp) +; RV64I-NEXT: sd s1, 40(sp) +; RV64I-NEXT: sd s2, 32(sp) +; RV64I-NEXT: sd s3, 24(sp) +; RV64I-NEXT: sd s4, 16(sp) +; RV64I-NEXT: sd s5, 8(sp) +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: .cfi_offset s0, -16 +; RV64I-NEXT: .cfi_offset s1, -24 +; RV64I-NEXT: .cfi_offset s2, -32 +; RV64I-NEXT: .cfi_offset s3, -40 +; RV64I-NEXT: .cfi_offset s4, -48 +; RV64I-NEXT: .cfi_offset s5, -56 +; RV64I-NEXT: lh s2, 24(a1) +; RV64I-NEXT: lh s3, 16(a1) +; RV64I-NEXT: lh s0, 8(a1) +; RV64I-NEXT: lh a2, 0(a1) +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: addi a1, zero, 95 +; RV64I-NEXT: mv a0, a2 +; RV64I-NEXT: call __moddi3 +; RV64I-NEXT: mv s4, a0 +; RV64I-NEXT: addi a1, zero, -124 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: call __moddi3 +; RV64I-NEXT: mv s5, a0 +; RV64I-NEXT: addi a1, zero, 98 +; RV64I-NEXT: mv a0, s3 +; RV64I-NEXT: call __moddi3 +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: addi a1, zero, -1003 +; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: call __moddi3 +; RV64I-NEXT: sh a0, 6(s1) +; RV64I-NEXT: sh s0, 4(s1) +; RV64I-NEXT: sh s5, 2(s1) +; RV64I-NEXT: sh s4, 0(s1) +; RV64I-NEXT: ld s5, 8(sp) +; RV64I-NEXT: ld s4, 16(sp) +; RV64I-NEXT: ld s3, 24(sp) +; RV64I-NEXT: ld s2, 32(sp) +; RV64I-NEXT: ld s1, 40(sp) +; RV64I-NEXT: ld s0, 48(sp) +; RV64I-NEXT: ld ra, 56(sp) +; RV64I-NEXT: .cfi_restore ra +; RV64I-NEXT: .cfi_restore s0 +; RV64I-NEXT: .cfi_restore s1 +; RV64I-NEXT: .cfi_restore s2 +; RV64I-NEXT: .cfi_restore s3 +; RV64I-NEXT: .cfi_restore s4 +; RV64I-NEXT: .cfi_restore s5 +; RV64I-NEXT: addi sp, sp, 64 +; RV64I-NEXT: .cfi_def_cfa_offset 0 +; RV64I-NEXT: ret +; +; RV64IM-LABEL: fold_srem_vec_1: +; RV64IM: # %bb.0: +; RV64IM-NEXT: lh a6, 24(a1) +; RV64IM-NEXT: lh a3, 16(a1) +; RV64IM-NEXT: lh a4, 8(a1) +; RV64IM-NEXT: lh a1, 0(a1) +; RV64IM-NEXT: lui a5, 1045903 +; RV64IM-NEXT: addiw a5, a5, -733 +; RV64IM-NEXT: slli a5, a5, 15 +; RV64IM-NEXT: addi a5, a5, 1035 +; RV64IM-NEXT: slli a5, a5, 12 +; RV64IM-NEXT: addi a5, a5, -905 +; RV64IM-NEXT: slli a5, a5, 12 +; RV64IM-NEXT: addi a5, a5, -1767 +; RV64IM-NEXT: mulh a5, a1, a5 +; RV64IM-NEXT: add a5, a5, a1 +; RV64IM-NEXT: srli a2, a5, 63 +; RV64IM-NEXT: srli a5, a5, 6 +; RV64IM-NEXT: add a2, a5, a2 +; RV64IM-NEXT: addi a5, zero, 95 +; RV64IM-NEXT: mul a2, a2, a5 +; RV64IM-NEXT: sub a1, a1, a2 +; RV64IM-NEXT: lui a2, 248 +; RV64IM-NEXT: addiw a2, a2, -1057 +; RV64IM-NEXT: slli a2, a2, 15 +; RV64IM-NEXT: addi a2, a2, -1057 +; RV64IM-NEXT: slli a2, a2, 15 +; RV64IM-NEXT: addi a2, a2, -1057 +; RV64IM-NEXT: slli a2, a2, 13 +; RV64IM-NEXT: addi a2, a2, -265 +; RV64IM-NEXT: mulh a2, a4, a2 +; RV64IM-NEXT: sub a2, a2, a4 +; RV64IM-NEXT: srli a5, a2, 63 +; RV64IM-NEXT: srli a2, a2, 6 +; RV64IM-NEXT: add a2, a2, a5 +; RV64IM-NEXT: addi a5, zero, -124 +; RV64IM-NEXT: mul a2, a2, a5 +; RV64IM-NEXT: sub a2, a4, a2 +; RV64IM-NEXT: lui a4, 2675 +; RV64IM-NEXT: addiw a4, a4, -251 +; RV64IM-NEXT: slli a4, a4, 13 +; RV64IM-NEXT: addi a4, a4, 1839 +; RV64IM-NEXT: slli a4, a4, 13 +; RV64IM-NEXT: addi a4, a4, 167 +; RV64IM-NEXT: slli a4, a4, 13 +; RV64IM-NEXT: addi a4, a4, 1505 +; RV64IM-NEXT: mulh a4, a3, a4 +; RV64IM-NEXT: srli a5, a4, 63 +; RV64IM-NEXT: srli a4, a4, 5 +; RV64IM-NEXT: add a4, a4, a5 +; RV64IM-NEXT: addi a5, zero, 98 +; RV64IM-NEXT: mul a4, a4, a5 +; RV64IM-NEXT: sub a3, a3, a4 +; RV64IM-NEXT: lui a4, 1040212 +; RV64IM-NEXT: addiw a4, a4, 1977 +; RV64IM-NEXT: slli a4, a4, 12 +; RV64IM-NEXT: addi a4, a4, -1907 +; RV64IM-NEXT: slli a4, a4, 12 +; RV64IM-NEXT: addi a4, a4, -453 +; RV64IM-NEXT: slli a4, a4, 12 +; RV64IM-NEXT: addi a4, a4, -1213 +; RV64IM-NEXT: mulh a4, a6, a4 +; RV64IM-NEXT: srli a5, a4, 63 +; RV64IM-NEXT: srli a4, a4, 7 +; RV64IM-NEXT: add a4, a4, a5 +; RV64IM-NEXT: addi a5, zero, -1003 +; RV64IM-NEXT: mul a4, a4, a5 +; RV64IM-NEXT: sub a4, a6, a4 +; RV64IM-NEXT: sh a4, 6(a0) +; RV64IM-NEXT: sh a3, 4(a0) +; RV64IM-NEXT: sh a2, 2(a0) +; RV64IM-NEXT: sh a1, 0(a0) +; RV64IM-NEXT: .cfi_def_cfa_offset 0 +; RV64IM-NEXT: ret + %1 = srem <4 x i16> %x, + ret <4 x i16> %1 +} + +define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) { +; RV32I-LABEL: fold_srem_vec_2: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -32 +; RV32I-NEXT: .cfi_def_cfa_offset 32 +; RV32I-NEXT: sw ra, 28(sp) +; RV32I-NEXT: sw s0, 24(sp) +; RV32I-NEXT: sw s1, 20(sp) +; RV32I-NEXT: sw s2, 16(sp) +; RV32I-NEXT: sw s3, 12(sp) +; RV32I-NEXT: sw s4, 8(sp) +; RV32I-NEXT: sw s5, 4(sp) +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: .cfi_offset s0, -8 +; RV32I-NEXT: .cfi_offset s1, -12 +; RV32I-NEXT: .cfi_offset s2, -16 +; RV32I-NEXT: .cfi_offset s3, -20 +; RV32I-NEXT: .cfi_offset s4, -24 +; RV32I-NEXT: .cfi_offset s5, -28 +; RV32I-NEXT: lh s2, 12(a1) +; RV32I-NEXT: lh s3, 8(a1) +; RV32I-NEXT: lh s0, 4(a1) +; RV32I-NEXT: lh a2, 0(a1) +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: addi a1, zero, 95 +; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: call __modsi3 +; RV32I-NEXT: mv s4, a0 +; RV32I-NEXT: addi a1, zero, 95 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: call __modsi3 +; RV32I-NEXT: mv s5, a0 +; RV32I-NEXT: addi a1, zero, 95 +; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: call __modsi3 +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: addi a1, zero, 95 +; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: call __modsi3 +; RV32I-NEXT: sh a0, 6(s1) +; RV32I-NEXT: sh s0, 4(s1) +; RV32I-NEXT: sh s5, 2(s1) +; RV32I-NEXT: sh s4, 0(s1) +; RV32I-NEXT: lw s5, 4(sp) +; RV32I-NEXT: lw s4, 8(sp) +; RV32I-NEXT: lw s3, 12(sp) +; RV32I-NEXT: lw s2, 16(sp) +; RV32I-NEXT: lw s1, 20(sp) +; RV32I-NEXT: lw s0, 24(sp) +; RV32I-NEXT: lw ra, 28(sp) +; RV32I-NEXT: .cfi_restore ra +; RV32I-NEXT: .cfi_restore s0 +; RV32I-NEXT: .cfi_restore s1 +; RV32I-NEXT: .cfi_restore s2 +; RV32I-NEXT: .cfi_restore s3 +; RV32I-NEXT: .cfi_restore s4 +; RV32I-NEXT: .cfi_restore s5 +; RV32I-NEXT: addi sp, sp, 32 +; RV32I-NEXT: .cfi_def_cfa_offset 0 +; RV32I-NEXT: ret +; +; RV32IM-LABEL: fold_srem_vec_2: +; RV32IM: # %bb.0: +; RV32IM-NEXT: lh a6, 12(a1) +; RV32IM-NEXT: lh a3, 8(a1) +; RV32IM-NEXT: lh a4, 0(a1) +; RV32IM-NEXT: lh a1, 4(a1) +; RV32IM-NEXT: lui a5, 706409 +; RV32IM-NEXT: addi a5, a5, 389 +; RV32IM-NEXT: mulh a2, a4, a5 +; RV32IM-NEXT: add a2, a2, a4 +; RV32IM-NEXT: srli a7, a2, 31 +; RV32IM-NEXT: srli a2, a2, 6 +; RV32IM-NEXT: add a2, a2, a7 +; RV32IM-NEXT: addi a7, zero, 95 +; RV32IM-NEXT: mul a2, a2, a7 +; RV32IM-NEXT: sub t0, a4, a2 +; RV32IM-NEXT: mulh a4, a1, a5 +; RV32IM-NEXT: add a4, a4, a1 +; RV32IM-NEXT: srli a2, a4, 31 +; RV32IM-NEXT: srli a4, a4, 6 +; RV32IM-NEXT: add a2, a4, a2 +; RV32IM-NEXT: mul a2, a2, a7 +; RV32IM-NEXT: sub a1, a1, a2 +; RV32IM-NEXT: mulh a2, a3, a5 +; RV32IM-NEXT: add a2, a2, a3 +; RV32IM-NEXT: srli a4, a2, 31 +; RV32IM-NEXT: srli a2, a2, 6 +; RV32IM-NEXT: add a2, a2, a4 +; RV32IM-NEXT: mul a2, a2, a7 +; RV32IM-NEXT: sub a2, a3, a2 +; RV32IM-NEXT: mulh a3, a6, a5 +; RV32IM-NEXT: add a3, a3, a6 +; RV32IM-NEXT: srli a4, a3, 31 +; RV32IM-NEXT: srli a3, a3, 6 +; RV32IM-NEXT: add a3, a3, a4 +; RV32IM-NEXT: mul a3, a3, a7 +; RV32IM-NEXT: sub a3, a6, a3 +; RV32IM-NEXT: sh a3, 6(a0) +; RV32IM-NEXT: sh a2, 4(a0) +; RV32IM-NEXT: sh a1, 2(a0) +; RV32IM-NEXT: sh t0, 0(a0) +; RV32IM-NEXT: .cfi_def_cfa_offset 0 +; RV32IM-NEXT: ret +; +; RV64I-LABEL: fold_srem_vec_2: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -64 +; RV64I-NEXT: .cfi_def_cfa_offset 64 +; RV64I-NEXT: sd ra, 56(sp) +; RV64I-NEXT: sd s0, 48(sp) +; RV64I-NEXT: sd s1, 40(sp) +; RV64I-NEXT: sd s2, 32(sp) +; RV64I-NEXT: sd s3, 24(sp) +; RV64I-NEXT: sd s4, 16(sp) +; RV64I-NEXT: sd s5, 8(sp) +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: .cfi_offset s0, -16 +; RV64I-NEXT: .cfi_offset s1, -24 +; RV64I-NEXT: .cfi_offset s2, -32 +; RV64I-NEXT: .cfi_offset s3, -40 +; RV64I-NEXT: .cfi_offset s4, -48 +; RV64I-NEXT: .cfi_offset s5, -56 +; RV64I-NEXT: lh s2, 24(a1) +; RV64I-NEXT: lh s3, 16(a1) +; RV64I-NEXT: lh s0, 8(a1) +; RV64I-NEXT: lh a2, 0(a1) +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: addi a1, zero, 95 +; RV64I-NEXT: mv a0, a2 +; RV64I-NEXT: call __moddi3 +; RV64I-NEXT: mv s4, a0 +; RV64I-NEXT: addi a1, zero, 95 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: call __moddi3 +; RV64I-NEXT: mv s5, a0 +; RV64I-NEXT: addi a1, zero, 95 +; RV64I-NEXT: mv a0, s3 +; RV64I-NEXT: call __moddi3 +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: addi a1, zero, 95 +; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: call __moddi3 +; RV64I-NEXT: sh a0, 6(s1) +; RV64I-NEXT: sh s0, 4(s1) +; RV64I-NEXT: sh s5, 2(s1) +; RV64I-NEXT: sh s4, 0(s1) +; RV64I-NEXT: ld s5, 8(sp) +; RV64I-NEXT: ld s4, 16(sp) +; RV64I-NEXT: ld s3, 24(sp) +; RV64I-NEXT: ld s2, 32(sp) +; RV64I-NEXT: ld s1, 40(sp) +; RV64I-NEXT: ld s0, 48(sp) +; RV64I-NEXT: ld ra, 56(sp) +; RV64I-NEXT: .cfi_restore ra +; RV64I-NEXT: .cfi_restore s0 +; RV64I-NEXT: .cfi_restore s1 +; RV64I-NEXT: .cfi_restore s2 +; RV64I-NEXT: .cfi_restore s3 +; RV64I-NEXT: .cfi_restore s4 +; RV64I-NEXT: .cfi_restore s5 +; RV64I-NEXT: addi sp, sp, 64 +; RV64I-NEXT: .cfi_def_cfa_offset 0 +; RV64I-NEXT: ret +; +; RV64IM-LABEL: fold_srem_vec_2: +; RV64IM: # %bb.0: +; RV64IM-NEXT: lh a6, 24(a1) +; RV64IM-NEXT: lh a7, 16(a1) +; RV64IM-NEXT: lh a4, 8(a1) +; RV64IM-NEXT: lh a1, 0(a1) +; RV64IM-NEXT: lui a5, 1045903 +; RV64IM-NEXT: addiw a5, a5, -733 +; RV64IM-NEXT: slli a5, a5, 15 +; RV64IM-NEXT: addi a5, a5, 1035 +; RV64IM-NEXT: slli a5, a5, 12 +; RV64IM-NEXT: addi a5, a5, -905 +; RV64IM-NEXT: slli a5, a5, 12 +; RV64IM-NEXT: addi a5, a5, -1767 +; RV64IM-NEXT: mulh a2, a1, a5 +; RV64IM-NEXT: add a2, a2, a1 +; RV64IM-NEXT: srli a3, a2, 63 +; RV64IM-NEXT: srli a2, a2, 6 +; RV64IM-NEXT: add a2, a2, a3 +; RV64IM-NEXT: addi a3, zero, 95 +; RV64IM-NEXT: mul a2, a2, a3 +; RV64IM-NEXT: sub t0, a1, a2 +; RV64IM-NEXT: mulh a2, a4, a5 +; RV64IM-NEXT: add a2, a2, a4 +; RV64IM-NEXT: srli a1, a2, 63 +; RV64IM-NEXT: srli a2, a2, 6 +; RV64IM-NEXT: add a1, a2, a1 +; RV64IM-NEXT: mul a1, a1, a3 +; RV64IM-NEXT: sub a1, a4, a1 +; RV64IM-NEXT: mulh a2, a7, a5 +; RV64IM-NEXT: add a2, a2, a7 +; RV64IM-NEXT: srli a4, a2, 63 +; RV64IM-NEXT: srli a2, a2, 6 +; RV64IM-NEXT: add a2, a2, a4 +; RV64IM-NEXT: mul a2, a2, a3 +; RV64IM-NEXT: sub a2, a7, a2 +; RV64IM-NEXT: mulh a4, a6, a5 +; RV64IM-NEXT: add a4, a4, a6 +; RV64IM-NEXT: srli a5, a4, 63 +; RV64IM-NEXT: srli a4, a4, 6 +; RV64IM-NEXT: add a4, a4, a5 +; RV64IM-NEXT: mul a3, a4, a3 +; RV64IM-NEXT: sub a3, a6, a3 +; RV64IM-NEXT: sh a3, 6(a0) +; RV64IM-NEXT: sh a2, 4(a0) +; RV64IM-NEXT: sh a1, 2(a0) +; RV64IM-NEXT: sh t0, 0(a0) +; RV64IM-NEXT: .cfi_def_cfa_offset 0 +; RV64IM-NEXT: ret + %1 = srem <4 x i16> %x, + ret <4 x i16> %1 +} + + +; Don't fold if we can combine srem with sdiv. +define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) { +; RV32I-LABEL: combine_srem_sdiv: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -48 +; RV32I-NEXT: .cfi_def_cfa_offset 48 +; RV32I-NEXT: sw ra, 44(sp) +; RV32I-NEXT: sw s0, 40(sp) +; RV32I-NEXT: sw s1, 36(sp) +; RV32I-NEXT: sw s2, 32(sp) +; RV32I-NEXT: sw s3, 28(sp) +; RV32I-NEXT: sw s4, 24(sp) +; RV32I-NEXT: sw s5, 20(sp) +; RV32I-NEXT: sw s6, 16(sp) +; RV32I-NEXT: sw s7, 12(sp) +; RV32I-NEXT: sw s8, 8(sp) +; RV32I-NEXT: sw s9, 4(sp) +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: .cfi_offset s0, -8 +; RV32I-NEXT: .cfi_offset s1, -12 +; RV32I-NEXT: .cfi_offset s2, -16 +; RV32I-NEXT: .cfi_offset s3, -20 +; RV32I-NEXT: .cfi_offset s4, -24 +; RV32I-NEXT: .cfi_offset s5, -28 +; RV32I-NEXT: .cfi_offset s6, -32 +; RV32I-NEXT: .cfi_offset s7, -36 +; RV32I-NEXT: .cfi_offset s8, -40 +; RV32I-NEXT: .cfi_offset s9, -44 +; RV32I-NEXT: lh s2, 0(a1) +; RV32I-NEXT: lh s3, 4(a1) +; RV32I-NEXT: lh s4, 8(a1) +; RV32I-NEXT: lh s1, 12(a1) +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: addi a1, zero, 95 +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: call __modsi3 +; RV32I-NEXT: mv s5, a0 +; RV32I-NEXT: addi a1, zero, 95 +; RV32I-NEXT: mv a0, s4 +; RV32I-NEXT: call __modsi3 +; RV32I-NEXT: mv s6, a0 +; RV32I-NEXT: addi a1, zero, 95 +; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: call __modsi3 +; RV32I-NEXT: mv s7, a0 +; RV32I-NEXT: addi a1, zero, 95 +; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: call __modsi3 +; RV32I-NEXT: mv s8, a0 +; RV32I-NEXT: addi a1, zero, 95 +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: call __divsi3 +; RV32I-NEXT: mv s9, a0 +; RV32I-NEXT: addi a1, zero, 95 +; RV32I-NEXT: mv a0, s4 +; RV32I-NEXT: call __divsi3 +; RV32I-NEXT: mv s4, a0 +; RV32I-NEXT: addi a1, zero, 95 +; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: call __divsi3 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: addi a1, zero, 95 +; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: call __divsi3 +; RV32I-NEXT: add a0, s8, a0 +; RV32I-NEXT: add a1, s7, s1 +; RV32I-NEXT: add a2, s6, s4 +; RV32I-NEXT: add a3, s5, s9 +; RV32I-NEXT: sh a3, 6(s0) +; RV32I-NEXT: sh a2, 4(s0) +; RV32I-NEXT: sh a1, 2(s0) +; RV32I-NEXT: sh a0, 0(s0) +; RV32I-NEXT: lw s9, 4(sp) +; RV32I-NEXT: lw s8, 8(sp) +; RV32I-NEXT: lw s7, 12(sp) +; RV32I-NEXT: lw s6, 16(sp) +; RV32I-NEXT: lw s5, 20(sp) +; RV32I-NEXT: lw s4, 24(sp) +; RV32I-NEXT: lw s3, 28(sp) +; RV32I-NEXT: lw s2, 32(sp) +; RV32I-NEXT: lw s1, 36(sp) +; RV32I-NEXT: lw s0, 40(sp) +; RV32I-NEXT: lw ra, 44(sp) +; RV32I-NEXT: .cfi_restore ra +; RV32I-NEXT: .cfi_restore s0 +; RV32I-NEXT: .cfi_restore s1 +; RV32I-NEXT: .cfi_restore s2 +; RV32I-NEXT: .cfi_restore s3 +; RV32I-NEXT: .cfi_restore s4 +; RV32I-NEXT: .cfi_restore s5 +; RV32I-NEXT: .cfi_restore s6 +; RV32I-NEXT: .cfi_restore s7 +; RV32I-NEXT: .cfi_restore s8 +; RV32I-NEXT: .cfi_restore s9 +; RV32I-NEXT: addi sp, sp, 48 +; RV32I-NEXT: .cfi_def_cfa_offset 0 +; RV32I-NEXT: ret +; +; RV32IM-LABEL: combine_srem_sdiv: +; RV32IM: # %bb.0: +; RV32IM-NEXT: lh a6, 0(a1) +; RV32IM-NEXT: lh a3, 4(a1) +; RV32IM-NEXT: lh a4, 12(a1) +; RV32IM-NEXT: lh a1, 8(a1) +; RV32IM-NEXT: lui a5, 706409 +; RV32IM-NEXT: addi a5, a5, 389 +; RV32IM-NEXT: mulh a2, a4, a5 +; RV32IM-NEXT: add a2, a2, a4 +; RV32IM-NEXT: srli a7, a2, 31 +; RV32IM-NEXT: srai a2, a2, 6 +; RV32IM-NEXT: add t0, a2, a7 +; RV32IM-NEXT: addi a7, zero, 95 +; RV32IM-NEXT: mul a2, t0, a7 +; RV32IM-NEXT: sub t1, a4, a2 +; RV32IM-NEXT: mulh a4, a1, a5 +; RV32IM-NEXT: add a4, a4, a1 +; RV32IM-NEXT: srli a2, a4, 31 +; RV32IM-NEXT: srai a4, a4, 6 +; RV32IM-NEXT: add a2, a4, a2 +; RV32IM-NEXT: mul a4, a2, a7 +; RV32IM-NEXT: sub t2, a1, a4 +; RV32IM-NEXT: mulh a4, a3, a5 +; RV32IM-NEXT: add a4, a4, a3 +; RV32IM-NEXT: srli a1, a4, 31 +; RV32IM-NEXT: srai a4, a4, 6 +; RV32IM-NEXT: add a1, a4, a1 +; RV32IM-NEXT: mul a4, a1, a7 +; RV32IM-NEXT: sub a3, a3, a4 +; RV32IM-NEXT: mulh a4, a6, a5 +; RV32IM-NEXT: add a4, a4, a6 +; RV32IM-NEXT: srli a5, a4, 31 +; RV32IM-NEXT: srai a4, a4, 6 +; RV32IM-NEXT: add a4, a4, a5 +; RV32IM-NEXT: mul a5, a4, a7 +; RV32IM-NEXT: sub a5, a6, a5 +; RV32IM-NEXT: add a4, a5, a4 +; RV32IM-NEXT: add a1, a3, a1 +; RV32IM-NEXT: add a2, t2, a2 +; RV32IM-NEXT: add a3, t1, t0 +; RV32IM-NEXT: sh a3, 6(a0) +; RV32IM-NEXT: sh a2, 4(a0) +; RV32IM-NEXT: sh a1, 2(a0) +; RV32IM-NEXT: sh a4, 0(a0) +; RV32IM-NEXT: .cfi_def_cfa_offset 0 +; RV32IM-NEXT: ret +; +; RV64I-LABEL: combine_srem_sdiv: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -96 +; RV64I-NEXT: .cfi_def_cfa_offset 96 +; RV64I-NEXT: sd ra, 88(sp) +; RV64I-NEXT: sd s0, 80(sp) +; RV64I-NEXT: sd s1, 72(sp) +; RV64I-NEXT: sd s2, 64(sp) +; RV64I-NEXT: sd s3, 56(sp) +; RV64I-NEXT: sd s4, 48(sp) +; RV64I-NEXT: sd s5, 40(sp) +; RV64I-NEXT: sd s6, 32(sp) +; RV64I-NEXT: sd s7, 24(sp) +; RV64I-NEXT: sd s8, 16(sp) +; RV64I-NEXT: sd s9, 8(sp) +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: .cfi_offset s0, -16 +; RV64I-NEXT: .cfi_offset s1, -24 +; RV64I-NEXT: .cfi_offset s2, -32 +; RV64I-NEXT: .cfi_offset s3, -40 +; RV64I-NEXT: .cfi_offset s4, -48 +; RV64I-NEXT: .cfi_offset s5, -56 +; RV64I-NEXT: .cfi_offset s6, -64 +; RV64I-NEXT: .cfi_offset s7, -72 +; RV64I-NEXT: .cfi_offset s8, -80 +; RV64I-NEXT: .cfi_offset s9, -88 +; RV64I-NEXT: lh s2, 0(a1) +; RV64I-NEXT: lh s3, 8(a1) +; RV64I-NEXT: lh s4, 16(a1) +; RV64I-NEXT: lh s1, 24(a1) +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: addi a1, zero, 95 +; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: call __moddi3 +; RV64I-NEXT: mv s5, a0 +; RV64I-NEXT: addi a1, zero, 95 +; RV64I-NEXT: mv a0, s4 +; RV64I-NEXT: call __moddi3 +; RV64I-NEXT: mv s6, a0 +; RV64I-NEXT: addi a1, zero, 95 +; RV64I-NEXT: mv a0, s3 +; RV64I-NEXT: call __moddi3 +; RV64I-NEXT: mv s7, a0 +; RV64I-NEXT: addi a1, zero, 95 +; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: call __moddi3 +; RV64I-NEXT: mv s8, a0 +; RV64I-NEXT: addi a1, zero, 95 +; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: call __divdi3 +; RV64I-NEXT: mv s9, a0 +; RV64I-NEXT: addi a1, zero, 95 +; RV64I-NEXT: mv a0, s4 +; RV64I-NEXT: call __divdi3 +; RV64I-NEXT: mv s4, a0 +; RV64I-NEXT: addi a1, zero, 95 +; RV64I-NEXT: mv a0, s3 +; RV64I-NEXT: call __divdi3 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: addi a1, zero, 95 +; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: call __divdi3 +; RV64I-NEXT: add a0, s8, a0 +; RV64I-NEXT: add a1, s7, s1 +; RV64I-NEXT: add a2, s6, s4 +; RV64I-NEXT: add a3, s5, s9 +; RV64I-NEXT: sh a3, 6(s0) +; RV64I-NEXT: sh a2, 4(s0) +; RV64I-NEXT: sh a1, 2(s0) +; RV64I-NEXT: sh a0, 0(s0) +; RV64I-NEXT: ld s9, 8(sp) +; RV64I-NEXT: ld s8, 16(sp) +; RV64I-NEXT: ld s7, 24(sp) +; RV64I-NEXT: ld s6, 32(sp) +; RV64I-NEXT: ld s5, 40(sp) +; RV64I-NEXT: ld s4, 48(sp) +; RV64I-NEXT: ld s3, 56(sp) +; RV64I-NEXT: ld s2, 64(sp) +; RV64I-NEXT: ld s1, 72(sp) +; RV64I-NEXT: ld s0, 80(sp) +; RV64I-NEXT: ld ra, 88(sp) +; RV64I-NEXT: .cfi_restore ra +; RV64I-NEXT: .cfi_restore s0 +; RV64I-NEXT: .cfi_restore s1 +; RV64I-NEXT: .cfi_restore s2 +; RV64I-NEXT: .cfi_restore s3 +; RV64I-NEXT: .cfi_restore s4 +; RV64I-NEXT: .cfi_restore s5 +; RV64I-NEXT: .cfi_restore s6 +; RV64I-NEXT: .cfi_restore s7 +; RV64I-NEXT: .cfi_restore s8 +; RV64I-NEXT: .cfi_restore s9 +; RV64I-NEXT: addi sp, sp, 96 +; RV64I-NEXT: .cfi_def_cfa_offset 0 +; RV64I-NEXT: ret +; +; RV64IM-LABEL: combine_srem_sdiv: +; RV64IM: # %bb.0: +; RV64IM-NEXT: lh a6, 0(a1) +; RV64IM-NEXT: lh a7, 8(a1) +; RV64IM-NEXT: lh a4, 16(a1) +; RV64IM-NEXT: lh a1, 24(a1) +; RV64IM-NEXT: lui a5, 1045903 +; RV64IM-NEXT: addiw a5, a5, -733 +; RV64IM-NEXT: slli a5, a5, 15 +; RV64IM-NEXT: addi a5, a5, 1035 +; RV64IM-NEXT: slli a5, a5, 12 +; RV64IM-NEXT: addi a5, a5, -905 +; RV64IM-NEXT: slli a5, a5, 12 +; RV64IM-NEXT: addi a5, a5, -1767 +; RV64IM-NEXT: mulh a2, a1, a5 +; RV64IM-NEXT: add a2, a2, a1 +; RV64IM-NEXT: srli a3, a2, 63 +; RV64IM-NEXT: srai a2, a2, 6 +; RV64IM-NEXT: add t3, a2, a3 +; RV64IM-NEXT: addi t0, zero, 95 +; RV64IM-NEXT: mul a3, t3, t0 +; RV64IM-NEXT: sub t1, a1, a3 +; RV64IM-NEXT: mulh a3, a4, a5 +; RV64IM-NEXT: add a3, a3, a4 +; RV64IM-NEXT: srli a1, a3, 63 +; RV64IM-NEXT: srai a3, a3, 6 +; RV64IM-NEXT: add a1, a3, a1 +; RV64IM-NEXT: mul a3, a1, t0 +; RV64IM-NEXT: sub t2, a4, a3 +; RV64IM-NEXT: mulh a4, a7, a5 +; RV64IM-NEXT: add a4, a4, a7 +; RV64IM-NEXT: srli a3, a4, 63 +; RV64IM-NEXT: srai a4, a4, 6 +; RV64IM-NEXT: add a3, a4, a3 +; RV64IM-NEXT: mul a4, a3, t0 +; RV64IM-NEXT: sub a4, a7, a4 +; RV64IM-NEXT: mulh a5, a6, a5 +; RV64IM-NEXT: add a5, a5, a6 +; RV64IM-NEXT: srli a2, a5, 63 +; RV64IM-NEXT: srai a5, a5, 6 +; RV64IM-NEXT: add a2, a5, a2 +; RV64IM-NEXT: mul a5, a2, t0 +; RV64IM-NEXT: sub a5, a6, a5 +; RV64IM-NEXT: add a2, a5, a2 +; RV64IM-NEXT: add a3, a4, a3 +; RV64IM-NEXT: add a1, t2, a1 +; RV64IM-NEXT: add a4, t1, t3 +; RV64IM-NEXT: sh a4, 6(a0) +; RV64IM-NEXT: sh a1, 4(a0) +; RV64IM-NEXT: sh a3, 2(a0) +; RV64IM-NEXT: sh a2, 0(a0) +; RV64IM-NEXT: .cfi_def_cfa_offset 0 +; RV64IM-NEXT: ret + %1 = srem <4 x i16> %x, + %2 = sdiv <4 x i16> %x, + %3 = add <4 x i16> %1, %2 + ret <4 x i16> %3 +} + +; Don't fold for divisors that are a power of two. +define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) { +; RV32I-LABEL: dont_fold_srem_power_of_two: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -32 +; RV32I-NEXT: .cfi_def_cfa_offset 32 +; RV32I-NEXT: sw ra, 28(sp) +; RV32I-NEXT: sw s0, 24(sp) +; RV32I-NEXT: sw s1, 20(sp) +; RV32I-NEXT: sw s2, 16(sp) +; RV32I-NEXT: sw s3, 12(sp) +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: .cfi_offset s0, -8 +; RV32I-NEXT: .cfi_offset s1, -12 +; RV32I-NEXT: .cfi_offset s2, -16 +; RV32I-NEXT: .cfi_offset s3, -20 +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lh a2, 0(a1) +; RV32I-NEXT: lh a0, 12(a1) +; RV32I-NEXT: lh a3, 8(a1) +; RV32I-NEXT: lh a1, 4(a1) +; RV32I-NEXT: srai a4, a2, 31 +; RV32I-NEXT: srli a4, a4, 26 +; RV32I-NEXT: add a4, a2, a4 +; RV32I-NEXT: lui a6, 16 +; RV32I-NEXT: addi a5, a6, -64 +; RV32I-NEXT: and a4, a4, a5 +; RV32I-NEXT: sub s2, a2, a4 +; RV32I-NEXT: srai a2, a1, 31 +; RV32I-NEXT: srli a2, a2, 27 +; RV32I-NEXT: add a2, a1, a2 +; RV32I-NEXT: addi a4, a6, -32 +; RV32I-NEXT: and a2, a2, a4 +; RV32I-NEXT: sub s3, a1, a2 +; RV32I-NEXT: srai a1, a3, 31 +; RV32I-NEXT: srli a1, a1, 29 +; RV32I-NEXT: add a1, a3, a1 +; RV32I-NEXT: addi a2, a6, -8 +; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: sub s1, a3, a1 +; RV32I-NEXT: addi a1, zero, 95 +; RV32I-NEXT: call __modsi3 +; RV32I-NEXT: sh a0, 6(s0) +; RV32I-NEXT: sh s1, 4(s0) +; RV32I-NEXT: sh s3, 2(s0) +; RV32I-NEXT: sh s2, 0(s0) +; RV32I-NEXT: lw s3, 12(sp) +; RV32I-NEXT: lw s2, 16(sp) +; RV32I-NEXT: lw s1, 20(sp) +; RV32I-NEXT: lw s0, 24(sp) +; RV32I-NEXT: lw ra, 28(sp) +; RV32I-NEXT: .cfi_restore ra +; RV32I-NEXT: .cfi_restore s0 +; RV32I-NEXT: .cfi_restore s1 +; RV32I-NEXT: .cfi_restore s2 +; RV32I-NEXT: .cfi_restore s3 +; RV32I-NEXT: addi sp, sp, 32 +; RV32I-NEXT: .cfi_def_cfa_offset 0 +; RV32I-NEXT: ret +; +; RV32IM-LABEL: dont_fold_srem_power_of_two: +; RV32IM: # %bb.0: +; RV32IM-NEXT: lh a6, 8(a1) +; RV32IM-NEXT: lh a3, 4(a1) +; RV32IM-NEXT: lh a4, 12(a1) +; RV32IM-NEXT: lh a1, 0(a1) +; RV32IM-NEXT: lui a5, 706409 +; RV32IM-NEXT: addi a5, a5, 389 +; RV32IM-NEXT: mulh a5, a4, a5 +; RV32IM-NEXT: add a5, a5, a4 +; RV32IM-NEXT: srli a2, a5, 31 +; RV32IM-NEXT: srli a5, a5, 6 +; RV32IM-NEXT: add a2, a5, a2 +; RV32IM-NEXT: addi a5, zero, 95 +; RV32IM-NEXT: mul a2, a2, a5 +; RV32IM-NEXT: sub a7, a4, a2 +; RV32IM-NEXT: srai a4, a1, 31 +; RV32IM-NEXT: srli a4, a4, 26 +; RV32IM-NEXT: add a4, a1, a4 +; RV32IM-NEXT: lui a5, 16 +; RV32IM-NEXT: addi a2, a5, -64 +; RV32IM-NEXT: and a2, a4, a2 +; RV32IM-NEXT: sub a1, a1, a2 +; RV32IM-NEXT: srai a2, a3, 31 +; RV32IM-NEXT: srli a2, a2, 27 +; RV32IM-NEXT: add a2, a3, a2 +; RV32IM-NEXT: addi a4, a5, -32 +; RV32IM-NEXT: and a2, a2, a4 +; RV32IM-NEXT: sub a2, a3, a2 +; RV32IM-NEXT: srai a3, a6, 31 +; RV32IM-NEXT: srli a3, a3, 29 +; RV32IM-NEXT: add a3, a6, a3 +; RV32IM-NEXT: addi a4, a5, -8 +; RV32IM-NEXT: and a3, a3, a4 +; RV32IM-NEXT: sub a3, a6, a3 +; RV32IM-NEXT: sh a3, 4(a0) +; RV32IM-NEXT: sh a2, 2(a0) +; RV32IM-NEXT: sh a1, 0(a0) +; RV32IM-NEXT: sh a7, 6(a0) +; RV32IM-NEXT: .cfi_def_cfa_offset 0 +; RV32IM-NEXT: ret +; +; RV64I-LABEL: dont_fold_srem_power_of_two: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -48 +; RV64I-NEXT: .cfi_def_cfa_offset 48 +; RV64I-NEXT: sd ra, 40(sp) +; RV64I-NEXT: sd s0, 32(sp) +; RV64I-NEXT: sd s1, 24(sp) +; RV64I-NEXT: sd s2, 16(sp) +; RV64I-NEXT: sd s3, 8(sp) +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: .cfi_offset s0, -16 +; RV64I-NEXT: .cfi_offset s1, -24 +; RV64I-NEXT: .cfi_offset s2, -32 +; RV64I-NEXT: .cfi_offset s3, -40 +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: lh a2, 0(a1) +; RV64I-NEXT: lh a0, 24(a1) +; RV64I-NEXT: lh a3, 16(a1) +; RV64I-NEXT: lh a1, 8(a1) +; RV64I-NEXT: srai a4, a2, 63 +; RV64I-NEXT: srli a4, a4, 58 +; RV64I-NEXT: add a4, a2, a4 +; RV64I-NEXT: lui a6, 16 +; RV64I-NEXT: addiw a5, a6, -64 +; RV64I-NEXT: and a4, a4, a5 +; RV64I-NEXT: sub s2, a2, a4 +; RV64I-NEXT: srai a2, a1, 63 +; RV64I-NEXT: srli a2, a2, 59 +; RV64I-NEXT: add a2, a1, a2 +; RV64I-NEXT: addiw a4, a6, -32 +; RV64I-NEXT: and a2, a2, a4 +; RV64I-NEXT: sub s3, a1, a2 +; RV64I-NEXT: srai a1, a3, 63 +; RV64I-NEXT: srli a1, a1, 61 +; RV64I-NEXT: add a1, a3, a1 +; RV64I-NEXT: addiw a2, a6, -8 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: sub s1, a3, a1 +; RV64I-NEXT: addi a1, zero, 95 +; RV64I-NEXT: call __moddi3 +; RV64I-NEXT: sh a0, 6(s0) +; RV64I-NEXT: sh s1, 4(s0) +; RV64I-NEXT: sh s3, 2(s0) +; RV64I-NEXT: sh s2, 0(s0) +; RV64I-NEXT: ld s3, 8(sp) +; RV64I-NEXT: ld s2, 16(sp) +; RV64I-NEXT: ld s1, 24(sp) +; RV64I-NEXT: ld s0, 32(sp) +; RV64I-NEXT: ld ra, 40(sp) +; RV64I-NEXT: .cfi_restore ra +; RV64I-NEXT: .cfi_restore s0 +; RV64I-NEXT: .cfi_restore s1 +; RV64I-NEXT: .cfi_restore s2 +; RV64I-NEXT: .cfi_restore s3 +; RV64I-NEXT: addi sp, sp, 48 +; RV64I-NEXT: .cfi_def_cfa_offset 0 +; RV64I-NEXT: ret +; +; RV64IM-LABEL: dont_fold_srem_power_of_two: +; RV64IM: # %bb.0: +; RV64IM-NEXT: lh a6, 16(a1) +; RV64IM-NEXT: lh a3, 8(a1) +; RV64IM-NEXT: lh a4, 0(a1) +; RV64IM-NEXT: lh a1, 24(a1) +; RV64IM-NEXT: lui a5, 1045903 +; RV64IM-NEXT: addiw a5, a5, -733 +; RV64IM-NEXT: slli a5, a5, 15 +; RV64IM-NEXT: addi a5, a5, 1035 +; RV64IM-NEXT: slli a5, a5, 12 +; RV64IM-NEXT: addi a5, a5, -905 +; RV64IM-NEXT: slli a5, a5, 12 +; RV64IM-NEXT: addi a5, a5, -1767 +; RV64IM-NEXT: mulh a5, a1, a5 +; RV64IM-NEXT: add a5, a5, a1 +; RV64IM-NEXT: srli a2, a5, 63 +; RV64IM-NEXT: srli a5, a5, 6 +; RV64IM-NEXT: add a2, a5, a2 +; RV64IM-NEXT: addi a5, zero, 95 +; RV64IM-NEXT: mul a2, a2, a5 +; RV64IM-NEXT: sub a7, a1, a2 +; RV64IM-NEXT: srai a2, a4, 63 +; RV64IM-NEXT: srli a2, a2, 58 +; RV64IM-NEXT: add a2, a4, a2 +; RV64IM-NEXT: lui a5, 16 +; RV64IM-NEXT: addiw a1, a5, -64 +; RV64IM-NEXT: and a1, a2, a1 +; RV64IM-NEXT: sub a1, a4, a1 +; RV64IM-NEXT: srai a2, a3, 63 +; RV64IM-NEXT: srli a2, a2, 59 +; RV64IM-NEXT: add a2, a3, a2 +; RV64IM-NEXT: addiw a4, a5, -32 +; RV64IM-NEXT: and a2, a2, a4 +; RV64IM-NEXT: sub a2, a3, a2 +; RV64IM-NEXT: srai a3, a6, 63 +; RV64IM-NEXT: srli a3, a3, 61 +; RV64IM-NEXT: add a3, a6, a3 +; RV64IM-NEXT: addiw a4, a5, -8 +; RV64IM-NEXT: and a3, a3, a4 +; RV64IM-NEXT: sub a3, a6, a3 +; RV64IM-NEXT: sh a3, 4(a0) +; RV64IM-NEXT: sh a2, 2(a0) +; RV64IM-NEXT: sh a1, 0(a0) +; RV64IM-NEXT: sh a7, 6(a0) +; RV64IM-NEXT: .cfi_def_cfa_offset 0 +; RV64IM-NEXT: ret + %1 = srem <4 x i16> %x, + ret <4 x i16> %1 +} + +; Don't fold if the divisor is one. +define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) { +; RV32I-LABEL: dont_fold_srem_one: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -32 +; RV32I-NEXT: .cfi_def_cfa_offset 32 +; RV32I-NEXT: sw ra, 28(sp) +; RV32I-NEXT: sw s0, 24(sp) +; RV32I-NEXT: sw s1, 20(sp) +; RV32I-NEXT: sw s2, 16(sp) +; RV32I-NEXT: sw s3, 12(sp) +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: .cfi_offset s0, -8 +; RV32I-NEXT: .cfi_offset s1, -12 +; RV32I-NEXT: .cfi_offset s2, -16 +; RV32I-NEXT: .cfi_offset s3, -20 +; RV32I-NEXT: lh s2, 12(a1) +; RV32I-NEXT: lh s1, 8(a1) +; RV32I-NEXT: lh a2, 4(a1) +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: addi a1, zero, 654 +; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: call __modsi3 +; RV32I-NEXT: mv s3, a0 +; RV32I-NEXT: addi a1, zero, 23 +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: call __modsi3 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: lui a0, 1 +; RV32I-NEXT: addi a1, a0, 1327 +; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: call __modsi3 +; RV32I-NEXT: sh zero, 0(s0) +; RV32I-NEXT: sh a0, 6(s0) +; RV32I-NEXT: sh s1, 4(s0) +; RV32I-NEXT: sh s3, 2(s0) +; RV32I-NEXT: lw s3, 12(sp) +; RV32I-NEXT: lw s2, 16(sp) +; RV32I-NEXT: lw s1, 20(sp) +; RV32I-NEXT: lw s0, 24(sp) +; RV32I-NEXT: lw ra, 28(sp) +; RV32I-NEXT: .cfi_restore ra +; RV32I-NEXT: .cfi_restore s0 +; RV32I-NEXT: .cfi_restore s1 +; RV32I-NEXT: .cfi_restore s2 +; RV32I-NEXT: .cfi_restore s3 +; RV32I-NEXT: addi sp, sp, 32 +; RV32I-NEXT: .cfi_def_cfa_offset 0 +; RV32I-NEXT: ret +; +; RV32IM-LABEL: dont_fold_srem_one: +; RV32IM: # %bb.0: +; RV32IM-NEXT: lh a2, 12(a1) +; RV32IM-NEXT: lh a3, 4(a1) +; RV32IM-NEXT: lh a1, 8(a1) +; RV32IM-NEXT: lui a4, 820904 +; RV32IM-NEXT: addi a4, a4, -1903 +; RV32IM-NEXT: mulh a4, a3, a4 +; RV32IM-NEXT: add a4, a4, a3 +; RV32IM-NEXT: srli a5, a4, 31 +; RV32IM-NEXT: srli a4, a4, 9 +; RV32IM-NEXT: add a4, a4, a5 +; RV32IM-NEXT: addi a5, zero, 654 +; RV32IM-NEXT: mul a4, a4, a5 +; RV32IM-NEXT: sub a3, a3, a4 +; RV32IM-NEXT: lui a4, 729444 +; RV32IM-NEXT: addi a4, a4, 713 +; RV32IM-NEXT: mulh a4, a1, a4 +; RV32IM-NEXT: add a4, a4, a1 +; RV32IM-NEXT: srli a5, a4, 31 +; RV32IM-NEXT: srli a4, a4, 4 +; RV32IM-NEXT: add a4, a4, a5 +; RV32IM-NEXT: addi a5, zero, 23 +; RV32IM-NEXT: mul a4, a4, a5 +; RV32IM-NEXT: sub a1, a1, a4 +; RV32IM-NEXT: lui a4, 395996 +; RV32IM-NEXT: addi a4, a4, -2009 +; RV32IM-NEXT: mulh a4, a2, a4 +; RV32IM-NEXT: srli a5, a4, 31 +; RV32IM-NEXT: srli a4, a4, 11 +; RV32IM-NEXT: add a4, a4, a5 +; RV32IM-NEXT: lui a5, 1 +; RV32IM-NEXT: addi a5, a5, 1327 +; RV32IM-NEXT: mul a4, a4, a5 +; RV32IM-NEXT: sub a2, a2, a4 +; RV32IM-NEXT: sh zero, 0(a0) +; RV32IM-NEXT: sh a2, 6(a0) +; RV32IM-NEXT: sh a1, 4(a0) +; RV32IM-NEXT: sh a3, 2(a0) +; RV32IM-NEXT: .cfi_def_cfa_offset 0 +; RV32IM-NEXT: ret +; +; RV64I-LABEL: dont_fold_srem_one: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -48 +; RV64I-NEXT: .cfi_def_cfa_offset 48 +; RV64I-NEXT: sd ra, 40(sp) +; RV64I-NEXT: sd s0, 32(sp) +; RV64I-NEXT: sd s1, 24(sp) +; RV64I-NEXT: sd s2, 16(sp) +; RV64I-NEXT: sd s3, 8(sp) +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: .cfi_offset s0, -16 +; RV64I-NEXT: .cfi_offset s1, -24 +; RV64I-NEXT: .cfi_offset s2, -32 +; RV64I-NEXT: .cfi_offset s3, -40 +; RV64I-NEXT: lh s2, 24(a1) +; RV64I-NEXT: lh s1, 16(a1) +; RV64I-NEXT: lh a2, 8(a1) +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: addi a1, zero, 654 +; RV64I-NEXT: mv a0, a2 +; RV64I-NEXT: call __moddi3 +; RV64I-NEXT: mv s3, a0 +; RV64I-NEXT: addi a1, zero, 23 +; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: call __moddi3 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: lui a0, 1 +; RV64I-NEXT: addiw a1, a0, 1327 +; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: call __moddi3 +; RV64I-NEXT: sh zero, 0(s0) +; RV64I-NEXT: sh a0, 6(s0) +; RV64I-NEXT: sh s1, 4(s0) +; RV64I-NEXT: sh s3, 2(s0) +; RV64I-NEXT: ld s3, 8(sp) +; RV64I-NEXT: ld s2, 16(sp) +; RV64I-NEXT: ld s1, 24(sp) +; RV64I-NEXT: ld s0, 32(sp) +; RV64I-NEXT: ld ra, 40(sp) +; RV64I-NEXT: .cfi_restore ra +; RV64I-NEXT: .cfi_restore s0 +; RV64I-NEXT: .cfi_restore s1 +; RV64I-NEXT: .cfi_restore s2 +; RV64I-NEXT: .cfi_restore s3 +; RV64I-NEXT: addi sp, sp, 48 +; RV64I-NEXT: .cfi_def_cfa_offset 0 +; RV64I-NEXT: ret +; +; RV64IM-LABEL: dont_fold_srem_one: +; RV64IM: # %bb.0: +; RV64IM-NEXT: lh a2, 24(a1) +; RV64IM-NEXT: lh a3, 8(a1) +; RV64IM-NEXT: lh a1, 16(a1) +; RV64IM-NEXT: lui a4, 1043590 +; RV64IM-NEXT: addiw a4, a4, -1781 +; RV64IM-NEXT: slli a4, a4, 13 +; RV64IM-NEXT: addi a4, a4, 1069 +; RV64IM-NEXT: slli a4, a4, 12 +; RV64IM-NEXT: addi a4, a4, -1959 +; RV64IM-NEXT: slli a4, a4, 13 +; RV64IM-NEXT: addi a4, a4, 357 +; RV64IM-NEXT: mulh a4, a1, a4 +; RV64IM-NEXT: add a4, a4, a1 +; RV64IM-NEXT: srli a5, a4, 63 +; RV64IM-NEXT: srli a4, a4, 4 +; RV64IM-NEXT: add a4, a4, a5 +; RV64IM-NEXT: addi a5, zero, 23 +; RV64IM-NEXT: mul a4, a4, a5 +; RV64IM-NEXT: sub a1, a1, a4 +; RV64IM-NEXT: lui a4, 6413 +; RV64IM-NEXT: addiw a4, a4, 1265 +; RV64IM-NEXT: slli a4, a4, 13 +; RV64IM-NEXT: addi a4, a4, 1027 +; RV64IM-NEXT: slli a4, a4, 13 +; RV64IM-NEXT: addi a4, a4, 1077 +; RV64IM-NEXT: slli a4, a4, 12 +; RV64IM-NEXT: addi a4, a4, 965 +; RV64IM-NEXT: mulh a4, a3, a4 +; RV64IM-NEXT: srli a5, a4, 63 +; RV64IM-NEXT: srli a4, a4, 8 +; RV64IM-NEXT: add a4, a4, a5 +; RV64IM-NEXT: addi a5, zero, 654 +; RV64IM-NEXT: mul a4, a4, a5 +; RV64IM-NEXT: sub a3, a3, a4 +; RV64IM-NEXT: lui a4, 12375 +; RV64IM-NEXT: addiw a4, a4, -575 +; RV64IM-NEXT: slli a4, a4, 12 +; RV64IM-NEXT: addi a4, a4, 883 +; RV64IM-NEXT: slli a4, a4, 13 +; RV64IM-NEXT: addi a4, a4, -431 +; RV64IM-NEXT: slli a4, a4, 12 +; RV64IM-NEXT: addi a4, a4, 1959 +; RV64IM-NEXT: mulh a4, a2, a4 +; RV64IM-NEXT: srli a5, a4, 63 +; RV64IM-NEXT: srli a4, a4, 11 +; RV64IM-NEXT: add a4, a4, a5 +; RV64IM-NEXT: lui a5, 1 +; RV64IM-NEXT: addiw a5, a5, 1327 +; RV64IM-NEXT: mul a4, a4, a5 +; RV64IM-NEXT: sub a2, a2, a4 +; RV64IM-NEXT: sh zero, 0(a0) +; RV64IM-NEXT: sh a2, 6(a0) +; RV64IM-NEXT: sh a3, 2(a0) +; RV64IM-NEXT: sh a1, 4(a0) +; RV64IM-NEXT: .cfi_def_cfa_offset 0 +; RV64IM-NEXT: ret + %1 = srem <4 x i16> %x, + ret <4 x i16> %1 +} + +; Don't fold if the divisor is 2^15. +define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) { +; RV32I-LABEL: dont_fold_urem_i16_smax: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -32 +; RV32I-NEXT: .cfi_def_cfa_offset 32 +; RV32I-NEXT: sw ra, 28(sp) +; RV32I-NEXT: sw s0, 24(sp) +; RV32I-NEXT: sw s1, 20(sp) +; RV32I-NEXT: sw s2, 16(sp) +; RV32I-NEXT: sw s3, 12(sp) +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: .cfi_offset s0, -8 +; RV32I-NEXT: .cfi_offset s1, -12 +; RV32I-NEXT: .cfi_offset s2, -16 +; RV32I-NEXT: .cfi_offset s3, -20 +; RV32I-NEXT: lh a2, 4(a1) +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lh s2, 12(a1) +; RV32I-NEXT: lh a0, 8(a1) +; RV32I-NEXT: slli a1, a2, 16 +; RV32I-NEXT: srai a1, a1, 31 +; RV32I-NEXT: srli a1, a1, 17 +; RV32I-NEXT: add a1, a2, a1 +; RV32I-NEXT: lui a3, 8 +; RV32I-NEXT: and a1, a1, a3 +; RV32I-NEXT: sub s3, a2, a1 +; RV32I-NEXT: addi a1, zero, 23 +; RV32I-NEXT: call __modsi3 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: lui a0, 1 +; RV32I-NEXT: addi a1, a0, 1327 +; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: call __modsi3 +; RV32I-NEXT: sh zero, 0(s0) +; RV32I-NEXT: sh a0, 6(s0) +; RV32I-NEXT: sh s1, 4(s0) +; RV32I-NEXT: sh s3, 2(s0) +; RV32I-NEXT: lw s3, 12(sp) +; RV32I-NEXT: lw s2, 16(sp) +; RV32I-NEXT: lw s1, 20(sp) +; RV32I-NEXT: lw s0, 24(sp) +; RV32I-NEXT: lw ra, 28(sp) +; RV32I-NEXT: .cfi_restore ra +; RV32I-NEXT: .cfi_restore s0 +; RV32I-NEXT: .cfi_restore s1 +; RV32I-NEXT: .cfi_restore s2 +; RV32I-NEXT: .cfi_restore s3 +; RV32I-NEXT: addi sp, sp, 32 +; RV32I-NEXT: .cfi_def_cfa_offset 0 +; RV32I-NEXT: ret +; +; RV32IM-LABEL: dont_fold_urem_i16_smax: +; RV32IM: # %bb.0: +; RV32IM-NEXT: lh a2, 4(a1) +; RV32IM-NEXT: slli a6, a2, 16 +; RV32IM-NEXT: lh a4, 8(a1) +; RV32IM-NEXT: lh a1, 12(a1) +; RV32IM-NEXT: lui a5, 729444 +; RV32IM-NEXT: addi a5, a5, 713 +; RV32IM-NEXT: mulh a5, a4, a5 +; RV32IM-NEXT: add a5, a5, a4 +; RV32IM-NEXT: srli a3, a5, 31 +; RV32IM-NEXT: srli a5, a5, 4 +; RV32IM-NEXT: add a3, a5, a3 +; RV32IM-NEXT: addi a5, zero, 23 +; RV32IM-NEXT: mul a3, a3, a5 +; RV32IM-NEXT: sub a3, a4, a3 +; RV32IM-NEXT: lui a4, 395996 +; RV32IM-NEXT: addi a4, a4, -2009 +; RV32IM-NEXT: mulh a4, a1, a4 +; RV32IM-NEXT: srli a5, a4, 31 +; RV32IM-NEXT: srli a4, a4, 11 +; RV32IM-NEXT: add a4, a4, a5 +; RV32IM-NEXT: lui a5, 1 +; RV32IM-NEXT: addi a5, a5, 1327 +; RV32IM-NEXT: mul a4, a4, a5 +; RV32IM-NEXT: sub a1, a1, a4 +; RV32IM-NEXT: srai a4, a6, 31 +; RV32IM-NEXT: srli a4, a4, 17 +; RV32IM-NEXT: add a4, a2, a4 +; RV32IM-NEXT: lui a5, 8 +; RV32IM-NEXT: and a4, a4, a5 +; RV32IM-NEXT: sub a2, a2, a4 +; RV32IM-NEXT: sh zero, 0(a0) +; RV32IM-NEXT: sh a1, 6(a0) +; RV32IM-NEXT: sh a3, 4(a0) +; RV32IM-NEXT: sh a2, 2(a0) +; RV32IM-NEXT: .cfi_def_cfa_offset 0 +; RV32IM-NEXT: ret +; +; RV64I-LABEL: dont_fold_urem_i16_smax: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -48 +; RV64I-NEXT: .cfi_def_cfa_offset 48 +; RV64I-NEXT: sd ra, 40(sp) +; RV64I-NEXT: sd s0, 32(sp) +; RV64I-NEXT: sd s1, 24(sp) +; RV64I-NEXT: sd s2, 16(sp) +; RV64I-NEXT: sd s3, 8(sp) +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: .cfi_offset s0, -16 +; RV64I-NEXT: .cfi_offset s1, -24 +; RV64I-NEXT: .cfi_offset s2, -32 +; RV64I-NEXT: .cfi_offset s3, -40 +; RV64I-NEXT: lh a2, 8(a1) +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: lh s2, 24(a1) +; RV64I-NEXT: lh a0, 16(a1) +; RV64I-NEXT: slli a1, a2, 48 +; RV64I-NEXT: srai a1, a1, 63 +; RV64I-NEXT: srli a1, a1, 49 +; RV64I-NEXT: add a1, a2, a1 +; RV64I-NEXT: lui a3, 8 +; RV64I-NEXT: and a1, a1, a3 +; RV64I-NEXT: sub s3, a2, a1 +; RV64I-NEXT: addi a1, zero, 23 +; RV64I-NEXT: call __moddi3 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: lui a0, 1 +; RV64I-NEXT: addiw a1, a0, 1327 +; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: call __moddi3 +; RV64I-NEXT: sh zero, 0(s0) +; RV64I-NEXT: sh a0, 6(s0) +; RV64I-NEXT: sh s1, 4(s0) +; RV64I-NEXT: sh s3, 2(s0) +; RV64I-NEXT: ld s3, 8(sp) +; RV64I-NEXT: ld s2, 16(sp) +; RV64I-NEXT: ld s1, 24(sp) +; RV64I-NEXT: ld s0, 32(sp) +; RV64I-NEXT: ld ra, 40(sp) +; RV64I-NEXT: .cfi_restore ra +; RV64I-NEXT: .cfi_restore s0 +; RV64I-NEXT: .cfi_restore s1 +; RV64I-NEXT: .cfi_restore s2 +; RV64I-NEXT: .cfi_restore s3 +; RV64I-NEXT: addi sp, sp, 48 +; RV64I-NEXT: .cfi_def_cfa_offset 0 +; RV64I-NEXT: ret +; +; RV64IM-LABEL: dont_fold_urem_i16_smax: +; RV64IM: # %bb.0: +; RV64IM-NEXT: lh a2, 8(a1) +; RV64IM-NEXT: slli a6, a2, 48 +; RV64IM-NEXT: lh a4, 24(a1) +; RV64IM-NEXT: lh a1, 16(a1) +; RV64IM-NEXT: lui a5, 1043590 +; RV64IM-NEXT: addiw a5, a5, -1781 +; RV64IM-NEXT: slli a5, a5, 13 +; RV64IM-NEXT: addi a5, a5, 1069 +; RV64IM-NEXT: slli a5, a5, 12 +; RV64IM-NEXT: addi a5, a5, -1959 +; RV64IM-NEXT: slli a5, a5, 13 +; RV64IM-NEXT: addi a5, a5, 357 +; RV64IM-NEXT: mulh a5, a1, a5 +; RV64IM-NEXT: add a5, a5, a1 +; RV64IM-NEXT: srli a3, a5, 63 +; RV64IM-NEXT: srli a5, a5, 4 +; RV64IM-NEXT: add a3, a5, a3 +; RV64IM-NEXT: addi a5, zero, 23 +; RV64IM-NEXT: mul a3, a3, a5 +; RV64IM-NEXT: sub a1, a1, a3 +; RV64IM-NEXT: lui a3, 12375 +; RV64IM-NEXT: addiw a3, a3, -575 +; RV64IM-NEXT: slli a3, a3, 12 +; RV64IM-NEXT: addi a3, a3, 883 +; RV64IM-NEXT: slli a3, a3, 13 +; RV64IM-NEXT: addi a3, a3, -431 +; RV64IM-NEXT: slli a3, a3, 12 +; RV64IM-NEXT: addi a3, a3, 1959 +; RV64IM-NEXT: mulh a3, a4, a3 +; RV64IM-NEXT: srli a5, a3, 63 +; RV64IM-NEXT: srli a3, a3, 11 +; RV64IM-NEXT: add a3, a3, a5 +; RV64IM-NEXT: lui a5, 1 +; RV64IM-NEXT: addiw a5, a5, 1327 +; RV64IM-NEXT: mul a3, a3, a5 +; RV64IM-NEXT: sub a3, a4, a3 +; RV64IM-NEXT: srai a4, a6, 63 +; RV64IM-NEXT: srli a4, a4, 49 +; RV64IM-NEXT: add a4, a2, a4 +; RV64IM-NEXT: lui a5, 8 +; RV64IM-NEXT: and a4, a4, a5 +; RV64IM-NEXT: sub a2, a2, a4 +; RV64IM-NEXT: sh zero, 0(a0) +; RV64IM-NEXT: sh a2, 2(a0) +; RV64IM-NEXT: sh a3, 6(a0) +; RV64IM-NEXT: sh a1, 4(a0) +; RV64IM-NEXT: .cfi_def_cfa_offset 0 +; RV64IM-NEXT: ret + %1 = srem <4 x i16> %x, + ret <4 x i16> %1 +} + +; Don't fold i64 srem. +define <4 x i64> @dont_fold_srem_i64(<4 x i64> %x) { +; RV32I-LABEL: dont_fold_srem_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -48 +; RV32I-NEXT: .cfi_def_cfa_offset 48 +; RV32I-NEXT: sw ra, 44(sp) +; RV32I-NEXT: sw s0, 40(sp) +; RV32I-NEXT: sw s1, 36(sp) +; RV32I-NEXT: sw s2, 32(sp) +; RV32I-NEXT: sw s3, 28(sp) +; RV32I-NEXT: sw s4, 24(sp) +; RV32I-NEXT: sw s5, 20(sp) +; RV32I-NEXT: sw s6, 16(sp) +; RV32I-NEXT: sw s7, 12(sp) +; RV32I-NEXT: sw s8, 8(sp) +; RV32I-NEXT: sw s9, 4(sp) +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: .cfi_offset s0, -8 +; RV32I-NEXT: .cfi_offset s1, -12 +; RV32I-NEXT: .cfi_offset s2, -16 +; RV32I-NEXT: .cfi_offset s3, -20 +; RV32I-NEXT: .cfi_offset s4, -24 +; RV32I-NEXT: .cfi_offset s5, -28 +; RV32I-NEXT: .cfi_offset s6, -32 +; RV32I-NEXT: .cfi_offset s7, -36 +; RV32I-NEXT: .cfi_offset s8, -40 +; RV32I-NEXT: .cfi_offset s9, -44 +; RV32I-NEXT: lw s2, 24(a1) +; RV32I-NEXT: lw s3, 28(a1) +; RV32I-NEXT: lw s4, 16(a1) +; RV32I-NEXT: lw s5, 20(a1) +; RV32I-NEXT: lw s6, 8(a1) +; RV32I-NEXT: lw s1, 12(a1) +; RV32I-NEXT: lw a3, 0(a1) +; RV32I-NEXT: lw a1, 4(a1) +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: addi a2, zero, 1 +; RV32I-NEXT: mv a0, a3 +; RV32I-NEXT: mv a3, zero +; RV32I-NEXT: call __moddi3 +; RV32I-NEXT: mv s7, a0 +; RV32I-NEXT: mv s8, a1 +; RV32I-NEXT: addi a2, zero, 654 +; RV32I-NEXT: mv a0, s6 +; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: mv a3, zero +; RV32I-NEXT: call __moddi3 +; RV32I-NEXT: mv s6, a0 +; RV32I-NEXT: mv s9, a1 +; RV32I-NEXT: addi a2, zero, 23 +; RV32I-NEXT: mv a0, s4 +; RV32I-NEXT: mv a1, s5 +; RV32I-NEXT: mv a3, zero +; RV32I-NEXT: call __moddi3 +; RV32I-NEXT: mv s4, a0 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: lui a0, 1 +; RV32I-NEXT: addi a2, a0, 1327 +; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a1, s3 +; RV32I-NEXT: mv a3, zero +; RV32I-NEXT: call __moddi3 +; RV32I-NEXT: sw a1, 28(s0) +; RV32I-NEXT: sw a0, 24(s0) +; RV32I-NEXT: sw s1, 20(s0) +; RV32I-NEXT: sw s4, 16(s0) +; RV32I-NEXT: sw s9, 12(s0) +; RV32I-NEXT: sw s6, 8(s0) +; RV32I-NEXT: sw s8, 4(s0) +; RV32I-NEXT: sw s7, 0(s0) +; RV32I-NEXT: lw s9, 4(sp) +; RV32I-NEXT: lw s8, 8(sp) +; RV32I-NEXT: lw s7, 12(sp) +; RV32I-NEXT: lw s6, 16(sp) +; RV32I-NEXT: lw s5, 20(sp) +; RV32I-NEXT: lw s4, 24(sp) +; RV32I-NEXT: lw s3, 28(sp) +; RV32I-NEXT: lw s2, 32(sp) +; RV32I-NEXT: lw s1, 36(sp) +; RV32I-NEXT: lw s0, 40(sp) +; RV32I-NEXT: lw ra, 44(sp) +; RV32I-NEXT: .cfi_restore ra +; RV32I-NEXT: .cfi_restore s0 +; RV32I-NEXT: .cfi_restore s1 +; RV32I-NEXT: .cfi_restore s2 +; RV32I-NEXT: .cfi_restore s3 +; RV32I-NEXT: .cfi_restore s4 +; RV32I-NEXT: .cfi_restore s5 +; RV32I-NEXT: .cfi_restore s6 +; RV32I-NEXT: .cfi_restore s7 +; RV32I-NEXT: .cfi_restore s8 +; RV32I-NEXT: .cfi_restore s9 +; RV32I-NEXT: addi sp, sp, 48 +; RV32I-NEXT: .cfi_def_cfa_offset 0 +; RV32I-NEXT: ret +; +; RV32IM-LABEL: dont_fold_srem_i64: +; RV32IM: # %bb.0: +; RV32IM-NEXT: addi sp, sp, -48 +; RV32IM-NEXT: .cfi_def_cfa_offset 48 +; RV32IM-NEXT: sw ra, 44(sp) +; RV32IM-NEXT: sw s0, 40(sp) +; RV32IM-NEXT: sw s1, 36(sp) +; RV32IM-NEXT: sw s2, 32(sp) +; RV32IM-NEXT: sw s3, 28(sp) +; RV32IM-NEXT: sw s4, 24(sp) +; RV32IM-NEXT: sw s5, 20(sp) +; RV32IM-NEXT: sw s6, 16(sp) +; RV32IM-NEXT: sw s7, 12(sp) +; RV32IM-NEXT: sw s8, 8(sp) +; RV32IM-NEXT: sw s9, 4(sp) +; RV32IM-NEXT: .cfi_offset ra, -4 +; RV32IM-NEXT: .cfi_offset s0, -8 +; RV32IM-NEXT: .cfi_offset s1, -12 +; RV32IM-NEXT: .cfi_offset s2, -16 +; RV32IM-NEXT: .cfi_offset s3, -20 +; RV32IM-NEXT: .cfi_offset s4, -24 +; RV32IM-NEXT: .cfi_offset s5, -28 +; RV32IM-NEXT: .cfi_offset s6, -32 +; RV32IM-NEXT: .cfi_offset s7, -36 +; RV32IM-NEXT: .cfi_offset s8, -40 +; RV32IM-NEXT: .cfi_offset s9, -44 +; RV32IM-NEXT: lw s2, 24(a1) +; RV32IM-NEXT: lw s3, 28(a1) +; RV32IM-NEXT: lw s4, 16(a1) +; RV32IM-NEXT: lw s5, 20(a1) +; RV32IM-NEXT: lw s6, 8(a1) +; RV32IM-NEXT: lw s1, 12(a1) +; RV32IM-NEXT: lw a3, 0(a1) +; RV32IM-NEXT: lw a1, 4(a1) +; RV32IM-NEXT: mv s0, a0 +; RV32IM-NEXT: addi a2, zero, 1 +; RV32IM-NEXT: mv a0, a3 +; RV32IM-NEXT: mv a3, zero +; RV32IM-NEXT: call __moddi3 +; RV32IM-NEXT: mv s7, a0 +; RV32IM-NEXT: mv s8, a1 +; RV32IM-NEXT: addi a2, zero, 654 +; RV32IM-NEXT: mv a0, s6 +; RV32IM-NEXT: mv a1, s1 +; RV32IM-NEXT: mv a3, zero +; RV32IM-NEXT: call __moddi3 +; RV32IM-NEXT: mv s6, a0 +; RV32IM-NEXT: mv s9, a1 +; RV32IM-NEXT: addi a2, zero, 23 +; RV32IM-NEXT: mv a0, s4 +; RV32IM-NEXT: mv a1, s5 +; RV32IM-NEXT: mv a3, zero +; RV32IM-NEXT: call __moddi3 +; RV32IM-NEXT: mv s4, a0 +; RV32IM-NEXT: mv s1, a1 +; RV32IM-NEXT: lui a0, 1 +; RV32IM-NEXT: addi a2, a0, 1327 +; RV32IM-NEXT: mv a0, s2 +; RV32IM-NEXT: mv a1, s3 +; RV32IM-NEXT: mv a3, zero +; RV32IM-NEXT: call __moddi3 +; RV32IM-NEXT: sw a1, 28(s0) +; RV32IM-NEXT: sw a0, 24(s0) +; RV32IM-NEXT: sw s1, 20(s0) +; RV32IM-NEXT: sw s4, 16(s0) +; RV32IM-NEXT: sw s9, 12(s0) +; RV32IM-NEXT: sw s6, 8(s0) +; RV32IM-NEXT: sw s8, 4(s0) +; RV32IM-NEXT: sw s7, 0(s0) +; RV32IM-NEXT: lw s9, 4(sp) +; RV32IM-NEXT: lw s8, 8(sp) +; RV32IM-NEXT: lw s7, 12(sp) +; RV32IM-NEXT: lw s6, 16(sp) +; RV32IM-NEXT: lw s5, 20(sp) +; RV32IM-NEXT: lw s4, 24(sp) +; RV32IM-NEXT: lw s3, 28(sp) +; RV32IM-NEXT: lw s2, 32(sp) +; RV32IM-NEXT: lw s1, 36(sp) +; RV32IM-NEXT: lw s0, 40(sp) +; RV32IM-NEXT: lw ra, 44(sp) +; RV32IM-NEXT: .cfi_restore ra +; RV32IM-NEXT: .cfi_restore s0 +; RV32IM-NEXT: .cfi_restore s1 +; RV32IM-NEXT: .cfi_restore s2 +; RV32IM-NEXT: .cfi_restore s3 +; RV32IM-NEXT: .cfi_restore s4 +; RV32IM-NEXT: .cfi_restore s5 +; RV32IM-NEXT: .cfi_restore s6 +; RV32IM-NEXT: .cfi_restore s7 +; RV32IM-NEXT: .cfi_restore s8 +; RV32IM-NEXT: .cfi_restore s9 +; RV32IM-NEXT: addi sp, sp, 48 +; RV32IM-NEXT: .cfi_def_cfa_offset 0 +; RV32IM-NEXT: ret +; +; RV64I-LABEL: dont_fold_srem_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -48 +; RV64I-NEXT: .cfi_def_cfa_offset 48 +; RV64I-NEXT: sd ra, 40(sp) +; RV64I-NEXT: sd s0, 32(sp) +; RV64I-NEXT: sd s1, 24(sp) +; RV64I-NEXT: sd s2, 16(sp) +; RV64I-NEXT: sd s3, 8(sp) +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: .cfi_offset s0, -16 +; RV64I-NEXT: .cfi_offset s1, -24 +; RV64I-NEXT: .cfi_offset s2, -32 +; RV64I-NEXT: .cfi_offset s3, -40 +; RV64I-NEXT: ld s2, 24(a1) +; RV64I-NEXT: ld s1, 16(a1) +; RV64I-NEXT: ld a2, 8(a1) +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: addi a1, zero, 654 +; RV64I-NEXT: mv a0, a2 +; RV64I-NEXT: call __moddi3 +; RV64I-NEXT: mv s3, a0 +; RV64I-NEXT: addi a1, zero, 23 +; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: call __moddi3 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: lui a0, 1 +; RV64I-NEXT: addiw a1, a0, 1327 +; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: call __moddi3 +; RV64I-NEXT: sd zero, 0(s0) +; RV64I-NEXT: sd a0, 24(s0) +; RV64I-NEXT: sd s1, 16(s0) +; RV64I-NEXT: sd s3, 8(s0) +; RV64I-NEXT: ld s3, 8(sp) +; RV64I-NEXT: ld s2, 16(sp) +; RV64I-NEXT: ld s1, 24(sp) +; RV64I-NEXT: ld s0, 32(sp) +; RV64I-NEXT: ld ra, 40(sp) +; RV64I-NEXT: .cfi_restore ra +; RV64I-NEXT: .cfi_restore s0 +; RV64I-NEXT: .cfi_restore s1 +; RV64I-NEXT: .cfi_restore s2 +; RV64I-NEXT: .cfi_restore s3 +; RV64I-NEXT: addi sp, sp, 48 +; RV64I-NEXT: .cfi_def_cfa_offset 0 +; RV64I-NEXT: ret +; +; RV64IM-LABEL: dont_fold_srem_i64: +; RV64IM: # %bb.0: +; RV64IM-NEXT: ld a2, 24(a1) +; RV64IM-NEXT: ld a3, 8(a1) +; RV64IM-NEXT: ld a1, 16(a1) +; RV64IM-NEXT: lui a4, 1043590 +; RV64IM-NEXT: addiw a4, a4, -1781 +; RV64IM-NEXT: slli a4, a4, 13 +; RV64IM-NEXT: addi a4, a4, 1069 +; RV64IM-NEXT: slli a4, a4, 12 +; RV64IM-NEXT: addi a4, a4, -1959 +; RV64IM-NEXT: slli a4, a4, 13 +; RV64IM-NEXT: addi a4, a4, 357 +; RV64IM-NEXT: mulh a4, a1, a4 +; RV64IM-NEXT: add a4, a4, a1 +; RV64IM-NEXT: srli a5, a4, 63 +; RV64IM-NEXT: srai a4, a4, 4 +; RV64IM-NEXT: add a4, a4, a5 +; RV64IM-NEXT: addi a5, zero, 23 +; RV64IM-NEXT: mul a4, a4, a5 +; RV64IM-NEXT: sub a1, a1, a4 +; RV64IM-NEXT: lui a4, 6413 +; RV64IM-NEXT: addiw a4, a4, 1265 +; RV64IM-NEXT: slli a4, a4, 13 +; RV64IM-NEXT: addi a4, a4, 1027 +; RV64IM-NEXT: slli a4, a4, 13 +; RV64IM-NEXT: addi a4, a4, 1077 +; RV64IM-NEXT: slli a4, a4, 12 +; RV64IM-NEXT: addi a4, a4, 965 +; RV64IM-NEXT: mulh a4, a3, a4 +; RV64IM-NEXT: srli a5, a4, 63 +; RV64IM-NEXT: srai a4, a4, 8 +; RV64IM-NEXT: add a4, a4, a5 +; RV64IM-NEXT: addi a5, zero, 654 +; RV64IM-NEXT: mul a4, a4, a5 +; RV64IM-NEXT: sub a3, a3, a4 +; RV64IM-NEXT: lui a4, 12375 +; RV64IM-NEXT: addiw a4, a4, -575 +; RV64IM-NEXT: slli a4, a4, 12 +; RV64IM-NEXT: addi a4, a4, 883 +; RV64IM-NEXT: slli a4, a4, 13 +; RV64IM-NEXT: addi a4, a4, -431 +; RV64IM-NEXT: slli a4, a4, 12 +; RV64IM-NEXT: addi a4, a4, 1959 +; RV64IM-NEXT: mulh a4, a2, a4 +; RV64IM-NEXT: srli a5, a4, 63 +; RV64IM-NEXT: srai a4, a4, 11 +; RV64IM-NEXT: add a4, a4, a5 +; RV64IM-NEXT: lui a5, 1 +; RV64IM-NEXT: addiw a5, a5, 1327 +; RV64IM-NEXT: mul a4, a4, a5 +; RV64IM-NEXT: sub a2, a2, a4 +; RV64IM-NEXT: sd zero, 0(a0) +; RV64IM-NEXT: sd a2, 24(a0) +; RV64IM-NEXT: sd a3, 8(a0) +; RV64IM-NEXT: sd a1, 16(a0) +; RV64IM-NEXT: .cfi_def_cfa_offset 0 +; RV64IM-NEXT: ret + %1 = srem <4 x i64> %x, + ret <4 x i64> %1 +} diff --git a/llvm/test/CodeGen/RISCV/urem-lkk.ll b/llvm/test/CodeGen/RISCV/urem-lkk.ll new file mode 100644 index 0000000000000..374ce07b2ac2d --- /dev/null +++ b/llvm/test/CodeGen/RISCV/urem-lkk.ll @@ -0,0 +1,354 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=CHECK,RV32I %s +; RUN: llc -mtriple=riscv32 -mattr=+m -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=CHECK,RV32IM %s +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=CHECK,RV64I %s +; RUN: llc -mtriple=riscv64 -mattr=+m -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=CHECK,RV64IM %s + +define i32 @fold_urem_positive_odd(i32 %x) { +; RV32I-LABEL: fold_urem_positive_odd: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: addi a1, zero, 95 +; RV32I-NEXT: call __umodsi3 +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: .cfi_restore ra +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: .cfi_def_cfa_offset 0 +; RV32I-NEXT: ret +; +; RV32IM-LABEL: fold_urem_positive_odd: +; RV32IM: # %bb.0: +; RV32IM-NEXT: lui a1, 364242 +; RV32IM-NEXT: addi a1, a1, 777 +; RV32IM-NEXT: mulhu a1, a0, a1 +; RV32IM-NEXT: sub a2, a0, a1 +; RV32IM-NEXT: srli a2, a2, 1 +; RV32IM-NEXT: add a1, a2, a1 +; RV32IM-NEXT: srli a1, a1, 6 +; RV32IM-NEXT: addi a2, zero, 95 +; RV32IM-NEXT: mul a1, a1, a2 +; RV32IM-NEXT: sub a0, a0, a1 +; RV32IM-NEXT: .cfi_def_cfa_offset 0 +; RV32IM-NEXT: ret +; +; RV64I-LABEL: fold_urem_positive_odd: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: .cfi_def_cfa_offset 16 +; RV64I-NEXT: sd ra, 8(sp) +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: addi a1, zero, 95 +; RV64I-NEXT: call __umoddi3 +; RV64I-NEXT: ld ra, 8(sp) +; RV64I-NEXT: .cfi_restore ra +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: .cfi_def_cfa_offset 0 +; RV64I-NEXT: ret +; +; RV64IM-LABEL: fold_urem_positive_odd: +; RV64IM: # %bb.0: +; RV64IM-NEXT: slli a0, a0, 32 +; RV64IM-NEXT: srli a0, a0, 32 +; RV64IM-NEXT: lui a1, 1423 +; RV64IM-NEXT: addiw a1, a1, -733 +; RV64IM-NEXT: slli a1, a1, 15 +; RV64IM-NEXT: addi a1, a1, 1035 +; RV64IM-NEXT: slli a1, a1, 13 +; RV64IM-NEXT: addi a1, a1, -1811 +; RV64IM-NEXT: slli a1, a1, 12 +; RV64IM-NEXT: addi a1, a1, 561 +; RV64IM-NEXT: mulhu a1, a0, a1 +; RV64IM-NEXT: sub a2, a0, a1 +; RV64IM-NEXT: srli a2, a2, 1 +; RV64IM-NEXT: add a1, a2, a1 +; RV64IM-NEXT: srli a1, a1, 6 +; RV64IM-NEXT: addi a2, zero, 95 +; RV64IM-NEXT: mul a1, a1, a2 +; RV64IM-NEXT: sub a0, a0, a1 +; RV64IM-NEXT: .cfi_def_cfa_offset 0 +; RV64IM-NEXT: ret + %1 = urem i32 %x, 95 + ret i32 %1 +} + + +define i32 @fold_urem_positive_even(i32 %x) { +; RV32I-LABEL: fold_urem_positive_even: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: addi a1, zero, 1060 +; RV32I-NEXT: call __umodsi3 +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: .cfi_restore ra +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: .cfi_def_cfa_offset 0 +; RV32I-NEXT: ret +; +; RV32IM-LABEL: fold_urem_positive_even: +; RV32IM: # %bb.0: +; RV32IM-NEXT: lui a1, 1012964 +; RV32IM-NEXT: addi a1, a1, -61 +; RV32IM-NEXT: mulhu a1, a0, a1 +; RV32IM-NEXT: srli a1, a1, 10 +; RV32IM-NEXT: addi a2, zero, 1060 +; RV32IM-NEXT: mul a1, a1, a2 +; RV32IM-NEXT: sub a0, a0, a1 +; RV32IM-NEXT: .cfi_def_cfa_offset 0 +; RV32IM-NEXT: ret +; +; RV64I-LABEL: fold_urem_positive_even: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: .cfi_def_cfa_offset 16 +; RV64I-NEXT: sd ra, 8(sp) +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: addi a1, zero, 1060 +; RV64I-NEXT: call __umoddi3 +; RV64I-NEXT: ld ra, 8(sp) +; RV64I-NEXT: .cfi_restore ra +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: .cfi_def_cfa_offset 0 +; RV64I-NEXT: ret +; +; RV64IM-LABEL: fold_urem_positive_even: +; RV64IM: # %bb.0: +; RV64IM-NEXT: slli a0, a0, 32 +; RV64IM-NEXT: srli a0, a0, 32 +; RV64IM-NEXT: lui a1, 1048020 +; RV64IM-NEXT: addiw a1, a1, -1793 +; RV64IM-NEXT: slli a1, a1, 12 +; RV64IM-NEXT: addi a1, a1, 139 +; RV64IM-NEXT: slli a1, a1, 14 +; RV64IM-NEXT: addi a1, a1, 1793 +; RV64IM-NEXT: slli a1, a1, 12 +; RV64IM-NEXT: addi a1, a1, -139 +; RV64IM-NEXT: mulhu a1, a0, a1 +; RV64IM-NEXT: srli a1, a1, 10 +; RV64IM-NEXT: addi a2, zero, 1060 +; RV64IM-NEXT: mul a1, a1, a2 +; RV64IM-NEXT: sub a0, a0, a1 +; RV64IM-NEXT: .cfi_def_cfa_offset 0 +; RV64IM-NEXT: ret + %1 = urem i32 %x, 1060 + ret i32 %1 +} + + +; Don't fold if we can combine urem with udiv. +define i32 @combine_urem_udiv(i32 %x) { +; RV32I-LABEL: combine_urem_udiv: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: sw s0, 8(sp) +; RV32I-NEXT: sw s1, 4(sp) +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: .cfi_offset s0, -8 +; RV32I-NEXT: .cfi_offset s1, -12 +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: addi a1, zero, 95 +; RV32I-NEXT: call __umodsi3 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: addi a1, zero, 95 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: call __udivsi3 +; RV32I-NEXT: add a0, s1, a0 +; RV32I-NEXT: lw s1, 4(sp) +; RV32I-NEXT: lw s0, 8(sp) +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: .cfi_restore ra +; RV32I-NEXT: .cfi_restore s0 +; RV32I-NEXT: .cfi_restore s1 +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: .cfi_def_cfa_offset 0 +; RV32I-NEXT: ret +; +; RV32IM-LABEL: combine_urem_udiv: +; RV32IM: # %bb.0: +; RV32IM-NEXT: lui a1, 364242 +; RV32IM-NEXT: addi a1, a1, 777 +; RV32IM-NEXT: mulhu a1, a0, a1 +; RV32IM-NEXT: sub a2, a0, a1 +; RV32IM-NEXT: srli a2, a2, 1 +; RV32IM-NEXT: add a1, a2, a1 +; RV32IM-NEXT: srli a1, a1, 6 +; RV32IM-NEXT: addi a2, zero, 95 +; RV32IM-NEXT: mul a2, a1, a2 +; RV32IM-NEXT: sub a0, a0, a2 +; RV32IM-NEXT: add a0, a0, a1 +; RV32IM-NEXT: .cfi_def_cfa_offset 0 +; RV32IM-NEXT: ret +; +; RV64I-LABEL: combine_urem_udiv: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: .cfi_def_cfa_offset 32 +; RV64I-NEXT: sd ra, 24(sp) +; RV64I-NEXT: sd s0, 16(sp) +; RV64I-NEXT: sd s1, 8(sp) +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: .cfi_offset s0, -16 +; RV64I-NEXT: .cfi_offset s1, -24 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: srli s0, a0, 32 +; RV64I-NEXT: addi a1, zero, 95 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: call __umoddi3 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: addi a1, zero, 95 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: call __udivdi3 +; RV64I-NEXT: add a0, s1, a0 +; RV64I-NEXT: ld s1, 8(sp) +; RV64I-NEXT: ld s0, 16(sp) +; RV64I-NEXT: ld ra, 24(sp) +; RV64I-NEXT: .cfi_restore ra +; RV64I-NEXT: .cfi_restore s0 +; RV64I-NEXT: .cfi_restore s1 +; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: .cfi_def_cfa_offset 0 +; RV64I-NEXT: ret +; +; RV64IM-LABEL: combine_urem_udiv: +; RV64IM: # %bb.0: +; RV64IM-NEXT: slli a0, a0, 32 +; RV64IM-NEXT: srli a0, a0, 32 +; RV64IM-NEXT: lui a1, 1423 +; RV64IM-NEXT: addiw a1, a1, -733 +; RV64IM-NEXT: slli a1, a1, 15 +; RV64IM-NEXT: addi a1, a1, 1035 +; RV64IM-NEXT: slli a1, a1, 13 +; RV64IM-NEXT: addi a1, a1, -1811 +; RV64IM-NEXT: slli a1, a1, 12 +; RV64IM-NEXT: addi a1, a1, 561 +; RV64IM-NEXT: mulhu a1, a0, a1 +; RV64IM-NEXT: sub a2, a0, a1 +; RV64IM-NEXT: srli a2, a2, 1 +; RV64IM-NEXT: add a1, a2, a1 +; RV64IM-NEXT: srli a1, a1, 6 +; RV64IM-NEXT: addi a2, zero, 95 +; RV64IM-NEXT: mul a2, a1, a2 +; RV64IM-NEXT: sub a0, a0, a2 +; RV64IM-NEXT: add a0, a0, a1 +; RV64IM-NEXT: .cfi_def_cfa_offset 0 +; RV64IM-NEXT: ret + %1 = urem i32 %x, 95 + %2 = udiv i32 %x, 95 + %3 = add i32 %1, %2 + ret i32 %3 +} + +; Don't fold for divisors that are a power of two. +define i32 @dont_fold_urem_power_of_two(i32 %x) { +; CHECK-LABEL: dont_fold_urem_power_of_two: +; CHECK: # %bb.0: +; CHECK-NEXT: andi a0, a0, 63 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: ret + %1 = urem i32 %x, 64 + ret i32 %1 +} + +; Don't fold if the divisor is one. +define i32 @dont_fold_urem_one(i32 %x) { +; CHECK-LABEL: dont_fold_urem_one: +; CHECK: # %bb.0: +; CHECK-NEXT: mv a0, zero +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: ret + %1 = urem i32 %x, 1 + ret i32 %1 +} + +; Don't fold if the divisor is 2^32. +define i32 @dont_fold_urem_i32_umax(i32 %x) { +; CHECK-LABEL: dont_fold_urem_i32_umax: +; CHECK: # %bb.0: +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: ret + %1 = urem i32 %x, 4294967296 + ret i32 %1 +} + +; Don't fold i64 urem +define i64 @dont_fold_urem_i64(i64 %x) { +; RV32I-LABEL: dont_fold_urem_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: addi a2, zero, 98 +; RV32I-NEXT: mv a3, zero +; RV32I-NEXT: call __umoddi3 +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: .cfi_restore ra +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: .cfi_def_cfa_offset 0 +; RV32I-NEXT: ret +; +; RV32IM-LABEL: dont_fold_urem_i64: +; RV32IM: # %bb.0: +; RV32IM-NEXT: addi sp, sp, -16 +; RV32IM-NEXT: .cfi_def_cfa_offset 16 +; RV32IM-NEXT: sw ra, 12(sp) +; RV32IM-NEXT: .cfi_offset ra, -4 +; RV32IM-NEXT: addi a2, zero, 98 +; RV32IM-NEXT: mv a3, zero +; RV32IM-NEXT: call __umoddi3 +; RV32IM-NEXT: lw ra, 12(sp) +; RV32IM-NEXT: .cfi_restore ra +; RV32IM-NEXT: addi sp, sp, 16 +; RV32IM-NEXT: .cfi_def_cfa_offset 0 +; RV32IM-NEXT: ret +; +; RV64I-LABEL: dont_fold_urem_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: .cfi_def_cfa_offset 16 +; RV64I-NEXT: sd ra, 8(sp) +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: addi a1, zero, 98 +; RV64I-NEXT: call __umoddi3 +; RV64I-NEXT: ld ra, 8(sp) +; RV64I-NEXT: .cfi_restore ra +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: .cfi_def_cfa_offset 0 +; RV64I-NEXT: ret +; +; RV64IM-LABEL: dont_fold_urem_i64: +; RV64IM: # %bb.0: +; RV64IM-NEXT: srli a1, a0, 1 +; RV64IM-NEXT: lui a2, 2675 +; RV64IM-NEXT: addiw a2, a2, -251 +; RV64IM-NEXT: slli a2, a2, 13 +; RV64IM-NEXT: addi a2, a2, 1839 +; RV64IM-NEXT: slli a2, a2, 13 +; RV64IM-NEXT: addi a2, a2, 167 +; RV64IM-NEXT: slli a2, a2, 13 +; RV64IM-NEXT: addi a2, a2, 1505 +; RV64IM-NEXT: mulhu a1, a1, a2 +; RV64IM-NEXT: srli a1, a1, 4 +; RV64IM-NEXT: addi a2, zero, 98 +; RV64IM-NEXT: mul a1, a1, a2 +; RV64IM-NEXT: sub a0, a0, a1 +; RV64IM-NEXT: .cfi_def_cfa_offset 0 +; RV64IM-NEXT: ret + %1 = urem i64 %x, 98 + ret i64 %1 +} diff --git a/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll b/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll new file mode 100644 index 0000000000000..bab79aeb0ee08 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll @@ -0,0 +1,1419 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=CHECK,RV32I %s +; RUN: llc -mtriple=riscv32 -mattr=+m -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=CHECK,RV32IM %s +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=CHECK,RV64I %s +; RUN: llc -mtriple=riscv64 -mattr=+m -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=CHECK,RV64IM %s + + +define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) { +; RV32I-LABEL: fold_urem_vec_1: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -32 +; RV32I-NEXT: .cfi_def_cfa_offset 32 +; RV32I-NEXT: sw ra, 28(sp) +; RV32I-NEXT: sw s0, 24(sp) +; RV32I-NEXT: sw s1, 20(sp) +; RV32I-NEXT: sw s2, 16(sp) +; RV32I-NEXT: sw s3, 12(sp) +; RV32I-NEXT: sw s4, 8(sp) +; RV32I-NEXT: sw s5, 4(sp) +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: .cfi_offset s0, -8 +; RV32I-NEXT: .cfi_offset s1, -12 +; RV32I-NEXT: .cfi_offset s2, -16 +; RV32I-NEXT: .cfi_offset s3, -20 +; RV32I-NEXT: .cfi_offset s4, -24 +; RV32I-NEXT: .cfi_offset s5, -28 +; RV32I-NEXT: lhu s2, 12(a1) +; RV32I-NEXT: lhu s3, 8(a1) +; RV32I-NEXT: lhu s0, 4(a1) +; RV32I-NEXT: lhu a2, 0(a1) +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: addi a1, zero, 95 +; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: call __umodsi3 +; RV32I-NEXT: mv s4, a0 +; RV32I-NEXT: addi a1, zero, 124 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: call __umodsi3 +; RV32I-NEXT: mv s5, a0 +; RV32I-NEXT: addi a1, zero, 98 +; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: call __umodsi3 +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: addi a1, zero, 1003 +; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: call __umodsi3 +; RV32I-NEXT: sh a0, 6(s1) +; RV32I-NEXT: sh s0, 4(s1) +; RV32I-NEXT: sh s5, 2(s1) +; RV32I-NEXT: sh s4, 0(s1) +; RV32I-NEXT: lw s5, 4(sp) +; RV32I-NEXT: lw s4, 8(sp) +; RV32I-NEXT: lw s3, 12(sp) +; RV32I-NEXT: lw s2, 16(sp) +; RV32I-NEXT: lw s1, 20(sp) +; RV32I-NEXT: lw s0, 24(sp) +; RV32I-NEXT: lw ra, 28(sp) +; RV32I-NEXT: .cfi_restore ra +; RV32I-NEXT: .cfi_restore s0 +; RV32I-NEXT: .cfi_restore s1 +; RV32I-NEXT: .cfi_restore s2 +; RV32I-NEXT: .cfi_restore s3 +; RV32I-NEXT: .cfi_restore s4 +; RV32I-NEXT: .cfi_restore s5 +; RV32I-NEXT: addi sp, sp, 32 +; RV32I-NEXT: .cfi_def_cfa_offset 0 +; RV32I-NEXT: ret +; +; RV32IM-LABEL: fold_urem_vec_1: +; RV32IM: # %bb.0: +; RV32IM-NEXT: lhu a6, 12(a1) +; RV32IM-NEXT: lhu a3, 8(a1) +; RV32IM-NEXT: lhu a4, 0(a1) +; RV32IM-NEXT: lhu a1, 4(a1) +; RV32IM-NEXT: lui a5, 364242 +; RV32IM-NEXT: addi a5, a5, 777 +; RV32IM-NEXT: mulhu a5, a4, a5 +; RV32IM-NEXT: sub a2, a4, a5 +; RV32IM-NEXT: srli a2, a2, 1 +; RV32IM-NEXT: add a2, a2, a5 +; RV32IM-NEXT: srli a2, a2, 6 +; RV32IM-NEXT: addi a5, zero, 95 +; RV32IM-NEXT: mul a2, a2, a5 +; RV32IM-NEXT: sub a2, a4, a2 +; RV32IM-NEXT: srli a4, a1, 2 +; RV32IM-NEXT: lui a5, 135300 +; RV32IM-NEXT: addi a5, a5, 529 +; RV32IM-NEXT: mulhu a4, a4, a5 +; RV32IM-NEXT: srli a4, a4, 2 +; RV32IM-NEXT: addi a5, zero, 124 +; RV32IM-NEXT: mul a4, a4, a5 +; RV32IM-NEXT: sub a1, a1, a4 +; RV32IM-NEXT: lui a4, 342392 +; RV32IM-NEXT: addi a4, a4, 669 +; RV32IM-NEXT: mulhu a4, a3, a4 +; RV32IM-NEXT: srli a4, a4, 5 +; RV32IM-NEXT: addi a5, zero, 98 +; RV32IM-NEXT: mul a4, a4, a5 +; RV32IM-NEXT: sub a3, a3, a4 +; RV32IM-NEXT: lui a4, 267633 +; RV32IM-NEXT: addi a4, a4, -1809 +; RV32IM-NEXT: mulhu a4, a6, a4 +; RV32IM-NEXT: srli a4, a4, 8 +; RV32IM-NEXT: addi a5, zero, 1003 +; RV32IM-NEXT: mul a4, a4, a5 +; RV32IM-NEXT: sub a4, a6, a4 +; RV32IM-NEXT: sh a4, 6(a0) +; RV32IM-NEXT: sh a3, 4(a0) +; RV32IM-NEXT: sh a1, 2(a0) +; RV32IM-NEXT: sh a2, 0(a0) +; RV32IM-NEXT: .cfi_def_cfa_offset 0 +; RV32IM-NEXT: ret +; +; RV64I-LABEL: fold_urem_vec_1: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -64 +; RV64I-NEXT: .cfi_def_cfa_offset 64 +; RV64I-NEXT: sd ra, 56(sp) +; RV64I-NEXT: sd s0, 48(sp) +; RV64I-NEXT: sd s1, 40(sp) +; RV64I-NEXT: sd s2, 32(sp) +; RV64I-NEXT: sd s3, 24(sp) +; RV64I-NEXT: sd s4, 16(sp) +; RV64I-NEXT: sd s5, 8(sp) +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: .cfi_offset s0, -16 +; RV64I-NEXT: .cfi_offset s1, -24 +; RV64I-NEXT: .cfi_offset s2, -32 +; RV64I-NEXT: .cfi_offset s3, -40 +; RV64I-NEXT: .cfi_offset s4, -48 +; RV64I-NEXT: .cfi_offset s5, -56 +; RV64I-NEXT: lhu s2, 24(a1) +; RV64I-NEXT: lhu s3, 16(a1) +; RV64I-NEXT: lhu s0, 8(a1) +; RV64I-NEXT: lhu a2, 0(a1) +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: addi a1, zero, 95 +; RV64I-NEXT: mv a0, a2 +; RV64I-NEXT: call __umoddi3 +; RV64I-NEXT: mv s4, a0 +; RV64I-NEXT: addi a1, zero, 124 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: call __umoddi3 +; RV64I-NEXT: mv s5, a0 +; RV64I-NEXT: addi a1, zero, 98 +; RV64I-NEXT: mv a0, s3 +; RV64I-NEXT: call __umoddi3 +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: addi a1, zero, 1003 +; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: call __umoddi3 +; RV64I-NEXT: sh a0, 6(s1) +; RV64I-NEXT: sh s0, 4(s1) +; RV64I-NEXT: sh s5, 2(s1) +; RV64I-NEXT: sh s4, 0(s1) +; RV64I-NEXT: ld s5, 8(sp) +; RV64I-NEXT: ld s4, 16(sp) +; RV64I-NEXT: ld s3, 24(sp) +; RV64I-NEXT: ld s2, 32(sp) +; RV64I-NEXT: ld s1, 40(sp) +; RV64I-NEXT: ld s0, 48(sp) +; RV64I-NEXT: ld ra, 56(sp) +; RV64I-NEXT: .cfi_restore ra +; RV64I-NEXT: .cfi_restore s0 +; RV64I-NEXT: .cfi_restore s1 +; RV64I-NEXT: .cfi_restore s2 +; RV64I-NEXT: .cfi_restore s3 +; RV64I-NEXT: .cfi_restore s4 +; RV64I-NEXT: .cfi_restore s5 +; RV64I-NEXT: addi sp, sp, 64 +; RV64I-NEXT: .cfi_def_cfa_offset 0 +; RV64I-NEXT: ret +; +; RV64IM-LABEL: fold_urem_vec_1: +; RV64IM: # %bb.0: +; RV64IM-NEXT: lhu a6, 24(a1) +; RV64IM-NEXT: lhu a3, 16(a1) +; RV64IM-NEXT: lhu a4, 8(a1) +; RV64IM-NEXT: lhu a1, 0(a1) +; RV64IM-NEXT: lui a5, 1423 +; RV64IM-NEXT: addiw a5, a5, -733 +; RV64IM-NEXT: slli a5, a5, 15 +; RV64IM-NEXT: addi a5, a5, 1035 +; RV64IM-NEXT: slli a5, a5, 13 +; RV64IM-NEXT: addi a5, a5, -1811 +; RV64IM-NEXT: slli a5, a5, 12 +; RV64IM-NEXT: addi a5, a5, 561 +; RV64IM-NEXT: mulhu a5, a1, a5 +; RV64IM-NEXT: sub a2, a1, a5 +; RV64IM-NEXT: srli a2, a2, 1 +; RV64IM-NEXT: add a2, a2, a5 +; RV64IM-NEXT: srli a2, a2, 6 +; RV64IM-NEXT: addi a5, zero, 95 +; RV64IM-NEXT: mul a2, a2, a5 +; RV64IM-NEXT: sub a1, a1, a2 +; RV64IM-NEXT: srli a2, a4, 2 +; RV64IM-NEXT: lui a5, 264 +; RV64IM-NEXT: addiw a5, a5, 1057 +; RV64IM-NEXT: slli a5, a5, 15 +; RV64IM-NEXT: addi a5, a5, 1057 +; RV64IM-NEXT: slli a5, a5, 15 +; RV64IM-NEXT: addi a5, a5, 1057 +; RV64IM-NEXT: slli a5, a5, 12 +; RV64IM-NEXT: addi a5, a5, 133 +; RV64IM-NEXT: mulhu a2, a2, a5 +; RV64IM-NEXT: srli a2, a2, 3 +; RV64IM-NEXT: addi a5, zero, 124 +; RV64IM-NEXT: mul a2, a2, a5 +; RV64IM-NEXT: sub a2, a4, a2 +; RV64IM-NEXT: srli a4, a3, 1 +; RV64IM-NEXT: lui a5, 2675 +; RV64IM-NEXT: addiw a5, a5, -251 +; RV64IM-NEXT: slli a5, a5, 13 +; RV64IM-NEXT: addi a5, a5, 1839 +; RV64IM-NEXT: slli a5, a5, 13 +; RV64IM-NEXT: addi a5, a5, 167 +; RV64IM-NEXT: slli a5, a5, 13 +; RV64IM-NEXT: addi a5, a5, 1505 +; RV64IM-NEXT: mulhu a4, a4, a5 +; RV64IM-NEXT: srli a4, a4, 4 +; RV64IM-NEXT: addi a5, zero, 98 +; RV64IM-NEXT: mul a4, a4, a5 +; RV64IM-NEXT: sub a3, a3, a4 +; RV64IM-NEXT: lui a4, 8364 +; RV64IM-NEXT: addiw a4, a4, -1977 +; RV64IM-NEXT: slli a4, a4, 12 +; RV64IM-NEXT: addi a4, a4, 1907 +; RV64IM-NEXT: slli a4, a4, 12 +; RV64IM-NEXT: addi a4, a4, 453 +; RV64IM-NEXT: slli a4, a4, 12 +; RV64IM-NEXT: addi a4, a4, 1213 +; RV64IM-NEXT: mulhu a4, a6, a4 +; RV64IM-NEXT: srli a4, a4, 7 +; RV64IM-NEXT: addi a5, zero, 1003 +; RV64IM-NEXT: mul a4, a4, a5 +; RV64IM-NEXT: sub a4, a6, a4 +; RV64IM-NEXT: sh a4, 6(a0) +; RV64IM-NEXT: sh a3, 4(a0) +; RV64IM-NEXT: sh a2, 2(a0) +; RV64IM-NEXT: sh a1, 0(a0) +; RV64IM-NEXT: .cfi_def_cfa_offset 0 +; RV64IM-NEXT: ret + %1 = urem <4 x i16> %x, + ret <4 x i16> %1 +} + +define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) { +; RV32I-LABEL: fold_urem_vec_2: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -32 +; RV32I-NEXT: .cfi_def_cfa_offset 32 +; RV32I-NEXT: sw ra, 28(sp) +; RV32I-NEXT: sw s0, 24(sp) +; RV32I-NEXT: sw s1, 20(sp) +; RV32I-NEXT: sw s2, 16(sp) +; RV32I-NEXT: sw s3, 12(sp) +; RV32I-NEXT: sw s4, 8(sp) +; RV32I-NEXT: sw s5, 4(sp) +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: .cfi_offset s0, -8 +; RV32I-NEXT: .cfi_offset s1, -12 +; RV32I-NEXT: .cfi_offset s2, -16 +; RV32I-NEXT: .cfi_offset s3, -20 +; RV32I-NEXT: .cfi_offset s4, -24 +; RV32I-NEXT: .cfi_offset s5, -28 +; RV32I-NEXT: lhu s2, 12(a1) +; RV32I-NEXT: lhu s3, 8(a1) +; RV32I-NEXT: lhu s0, 4(a1) +; RV32I-NEXT: lhu a2, 0(a1) +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: addi a1, zero, 95 +; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: call __umodsi3 +; RV32I-NEXT: mv s4, a0 +; RV32I-NEXT: addi a1, zero, 95 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: call __umodsi3 +; RV32I-NEXT: mv s5, a0 +; RV32I-NEXT: addi a1, zero, 95 +; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: call __umodsi3 +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: addi a1, zero, 95 +; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: call __umodsi3 +; RV32I-NEXT: sh a0, 6(s1) +; RV32I-NEXT: sh s0, 4(s1) +; RV32I-NEXT: sh s5, 2(s1) +; RV32I-NEXT: sh s4, 0(s1) +; RV32I-NEXT: lw s5, 4(sp) +; RV32I-NEXT: lw s4, 8(sp) +; RV32I-NEXT: lw s3, 12(sp) +; RV32I-NEXT: lw s2, 16(sp) +; RV32I-NEXT: lw s1, 20(sp) +; RV32I-NEXT: lw s0, 24(sp) +; RV32I-NEXT: lw ra, 28(sp) +; RV32I-NEXT: .cfi_restore ra +; RV32I-NEXT: .cfi_restore s0 +; RV32I-NEXT: .cfi_restore s1 +; RV32I-NEXT: .cfi_restore s2 +; RV32I-NEXT: .cfi_restore s3 +; RV32I-NEXT: .cfi_restore s4 +; RV32I-NEXT: .cfi_restore s5 +; RV32I-NEXT: addi sp, sp, 32 +; RV32I-NEXT: .cfi_def_cfa_offset 0 +; RV32I-NEXT: ret +; +; RV32IM-LABEL: fold_urem_vec_2: +; RV32IM: # %bb.0: +; RV32IM-NEXT: lhu a6, 12(a1) +; RV32IM-NEXT: lhu a7, 8(a1) +; RV32IM-NEXT: lhu a4, 0(a1) +; RV32IM-NEXT: lhu a1, 4(a1) +; RV32IM-NEXT: lui a5, 364242 +; RV32IM-NEXT: addi a5, a5, 777 +; RV32IM-NEXT: mulhu a2, a4, a5 +; RV32IM-NEXT: sub a3, a4, a2 +; RV32IM-NEXT: srli a3, a3, 1 +; RV32IM-NEXT: add a2, a3, a2 +; RV32IM-NEXT: srli a2, a2, 6 +; RV32IM-NEXT: addi a3, zero, 95 +; RV32IM-NEXT: mul a2, a2, a3 +; RV32IM-NEXT: sub t0, a4, a2 +; RV32IM-NEXT: mulhu a4, a1, a5 +; RV32IM-NEXT: sub a2, a1, a4 +; RV32IM-NEXT: srli a2, a2, 1 +; RV32IM-NEXT: add a2, a2, a4 +; RV32IM-NEXT: srli a2, a2, 6 +; RV32IM-NEXT: mul a2, a2, a3 +; RV32IM-NEXT: sub a1, a1, a2 +; RV32IM-NEXT: mulhu a2, a7, a5 +; RV32IM-NEXT: sub a4, a7, a2 +; RV32IM-NEXT: srli a4, a4, 1 +; RV32IM-NEXT: add a2, a4, a2 +; RV32IM-NEXT: srli a2, a2, 6 +; RV32IM-NEXT: mul a2, a2, a3 +; RV32IM-NEXT: sub a2, a7, a2 +; RV32IM-NEXT: mulhu a4, a6, a5 +; RV32IM-NEXT: sub a5, a6, a4 +; RV32IM-NEXT: srli a5, a5, 1 +; RV32IM-NEXT: add a4, a5, a4 +; RV32IM-NEXT: srli a4, a4, 6 +; RV32IM-NEXT: mul a3, a4, a3 +; RV32IM-NEXT: sub a3, a6, a3 +; RV32IM-NEXT: sh a3, 6(a0) +; RV32IM-NEXT: sh a2, 4(a0) +; RV32IM-NEXT: sh a1, 2(a0) +; RV32IM-NEXT: sh t0, 0(a0) +; RV32IM-NEXT: .cfi_def_cfa_offset 0 +; RV32IM-NEXT: ret +; +; RV64I-LABEL: fold_urem_vec_2: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -64 +; RV64I-NEXT: .cfi_def_cfa_offset 64 +; RV64I-NEXT: sd ra, 56(sp) +; RV64I-NEXT: sd s0, 48(sp) +; RV64I-NEXT: sd s1, 40(sp) +; RV64I-NEXT: sd s2, 32(sp) +; RV64I-NEXT: sd s3, 24(sp) +; RV64I-NEXT: sd s4, 16(sp) +; RV64I-NEXT: sd s5, 8(sp) +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: .cfi_offset s0, -16 +; RV64I-NEXT: .cfi_offset s1, -24 +; RV64I-NEXT: .cfi_offset s2, -32 +; RV64I-NEXT: .cfi_offset s3, -40 +; RV64I-NEXT: .cfi_offset s4, -48 +; RV64I-NEXT: .cfi_offset s5, -56 +; RV64I-NEXT: lhu s2, 24(a1) +; RV64I-NEXT: lhu s3, 16(a1) +; RV64I-NEXT: lhu s0, 8(a1) +; RV64I-NEXT: lhu a2, 0(a1) +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: addi a1, zero, 95 +; RV64I-NEXT: mv a0, a2 +; RV64I-NEXT: call __umoddi3 +; RV64I-NEXT: mv s4, a0 +; RV64I-NEXT: addi a1, zero, 95 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: call __umoddi3 +; RV64I-NEXT: mv s5, a0 +; RV64I-NEXT: addi a1, zero, 95 +; RV64I-NEXT: mv a0, s3 +; RV64I-NEXT: call __umoddi3 +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: addi a1, zero, 95 +; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: call __umoddi3 +; RV64I-NEXT: sh a0, 6(s1) +; RV64I-NEXT: sh s0, 4(s1) +; RV64I-NEXT: sh s5, 2(s1) +; RV64I-NEXT: sh s4, 0(s1) +; RV64I-NEXT: ld s5, 8(sp) +; RV64I-NEXT: ld s4, 16(sp) +; RV64I-NEXT: ld s3, 24(sp) +; RV64I-NEXT: ld s2, 32(sp) +; RV64I-NEXT: ld s1, 40(sp) +; RV64I-NEXT: ld s0, 48(sp) +; RV64I-NEXT: ld ra, 56(sp) +; RV64I-NEXT: .cfi_restore ra +; RV64I-NEXT: .cfi_restore s0 +; RV64I-NEXT: .cfi_restore s1 +; RV64I-NEXT: .cfi_restore s2 +; RV64I-NEXT: .cfi_restore s3 +; RV64I-NEXT: .cfi_restore s4 +; RV64I-NEXT: .cfi_restore s5 +; RV64I-NEXT: addi sp, sp, 64 +; RV64I-NEXT: .cfi_def_cfa_offset 0 +; RV64I-NEXT: ret +; +; RV64IM-LABEL: fold_urem_vec_2: +; RV64IM: # %bb.0: +; RV64IM-NEXT: lhu a6, 24(a1) +; RV64IM-NEXT: lhu a7, 16(a1) +; RV64IM-NEXT: lhu a4, 8(a1) +; RV64IM-NEXT: lhu a1, 0(a1) +; RV64IM-NEXT: lui a5, 1423 +; RV64IM-NEXT: addiw a5, a5, -733 +; RV64IM-NEXT: slli a5, a5, 15 +; RV64IM-NEXT: addi a5, a5, 1035 +; RV64IM-NEXT: slli a5, a5, 13 +; RV64IM-NEXT: addi a5, a5, -1811 +; RV64IM-NEXT: slli a5, a5, 12 +; RV64IM-NEXT: addi a5, a5, 561 +; RV64IM-NEXT: mulhu a2, a1, a5 +; RV64IM-NEXT: sub a3, a1, a2 +; RV64IM-NEXT: srli a3, a3, 1 +; RV64IM-NEXT: add a2, a3, a2 +; RV64IM-NEXT: srli a2, a2, 6 +; RV64IM-NEXT: addi a3, zero, 95 +; RV64IM-NEXT: mul a2, a2, a3 +; RV64IM-NEXT: sub t0, a1, a2 +; RV64IM-NEXT: mulhu a2, a4, a5 +; RV64IM-NEXT: sub a1, a4, a2 +; RV64IM-NEXT: srli a1, a1, 1 +; RV64IM-NEXT: add a1, a1, a2 +; RV64IM-NEXT: srli a1, a1, 6 +; RV64IM-NEXT: mul a1, a1, a3 +; RV64IM-NEXT: sub a1, a4, a1 +; RV64IM-NEXT: mulhu a2, a7, a5 +; RV64IM-NEXT: sub a4, a7, a2 +; RV64IM-NEXT: srli a4, a4, 1 +; RV64IM-NEXT: add a2, a4, a2 +; RV64IM-NEXT: srli a2, a2, 6 +; RV64IM-NEXT: mul a2, a2, a3 +; RV64IM-NEXT: sub a2, a7, a2 +; RV64IM-NEXT: mulhu a4, a6, a5 +; RV64IM-NEXT: sub a5, a6, a4 +; RV64IM-NEXT: srli a5, a5, 1 +; RV64IM-NEXT: add a4, a5, a4 +; RV64IM-NEXT: srli a4, a4, 6 +; RV64IM-NEXT: mul a3, a4, a3 +; RV64IM-NEXT: sub a3, a6, a3 +; RV64IM-NEXT: sh a3, 6(a0) +; RV64IM-NEXT: sh a2, 4(a0) +; RV64IM-NEXT: sh a1, 2(a0) +; RV64IM-NEXT: sh t0, 0(a0) +; RV64IM-NEXT: .cfi_def_cfa_offset 0 +; RV64IM-NEXT: ret + %1 = urem <4 x i16> %x, + ret <4 x i16> %1 +} + + +; Don't fold if we can combine urem with udiv. +define <4 x i16> @combine_urem_udiv(<4 x i16> %x) { +; RV32I-LABEL: combine_urem_udiv: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -48 +; RV32I-NEXT: .cfi_def_cfa_offset 48 +; RV32I-NEXT: sw ra, 44(sp) +; RV32I-NEXT: sw s0, 40(sp) +; RV32I-NEXT: sw s1, 36(sp) +; RV32I-NEXT: sw s2, 32(sp) +; RV32I-NEXT: sw s3, 28(sp) +; RV32I-NEXT: sw s4, 24(sp) +; RV32I-NEXT: sw s5, 20(sp) +; RV32I-NEXT: sw s6, 16(sp) +; RV32I-NEXT: sw s7, 12(sp) +; RV32I-NEXT: sw s8, 8(sp) +; RV32I-NEXT: sw s9, 4(sp) +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: .cfi_offset s0, -8 +; RV32I-NEXT: .cfi_offset s1, -12 +; RV32I-NEXT: .cfi_offset s2, -16 +; RV32I-NEXT: .cfi_offset s3, -20 +; RV32I-NEXT: .cfi_offset s4, -24 +; RV32I-NEXT: .cfi_offset s5, -28 +; RV32I-NEXT: .cfi_offset s6, -32 +; RV32I-NEXT: .cfi_offset s7, -36 +; RV32I-NEXT: .cfi_offset s8, -40 +; RV32I-NEXT: .cfi_offset s9, -44 +; RV32I-NEXT: lhu s2, 0(a1) +; RV32I-NEXT: lhu s3, 4(a1) +; RV32I-NEXT: lhu s4, 8(a1) +; RV32I-NEXT: lhu s1, 12(a1) +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: addi a1, zero, 95 +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: call __umodsi3 +; RV32I-NEXT: mv s5, a0 +; RV32I-NEXT: addi a1, zero, 95 +; RV32I-NEXT: mv a0, s4 +; RV32I-NEXT: call __umodsi3 +; RV32I-NEXT: mv s6, a0 +; RV32I-NEXT: addi a1, zero, 95 +; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: call __umodsi3 +; RV32I-NEXT: mv s7, a0 +; RV32I-NEXT: addi a1, zero, 95 +; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: call __umodsi3 +; RV32I-NEXT: mv s8, a0 +; RV32I-NEXT: addi a1, zero, 95 +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: call __udivsi3 +; RV32I-NEXT: mv s9, a0 +; RV32I-NEXT: addi a1, zero, 95 +; RV32I-NEXT: mv a0, s4 +; RV32I-NEXT: call __udivsi3 +; RV32I-NEXT: mv s4, a0 +; RV32I-NEXT: addi a1, zero, 95 +; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: call __udivsi3 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: addi a1, zero, 95 +; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: call __udivsi3 +; RV32I-NEXT: add a0, s8, a0 +; RV32I-NEXT: add a1, s7, s1 +; RV32I-NEXT: add a2, s6, s4 +; RV32I-NEXT: add a3, s5, s9 +; RV32I-NEXT: sh a3, 6(s0) +; RV32I-NEXT: sh a2, 4(s0) +; RV32I-NEXT: sh a1, 2(s0) +; RV32I-NEXT: sh a0, 0(s0) +; RV32I-NEXT: lw s9, 4(sp) +; RV32I-NEXT: lw s8, 8(sp) +; RV32I-NEXT: lw s7, 12(sp) +; RV32I-NEXT: lw s6, 16(sp) +; RV32I-NEXT: lw s5, 20(sp) +; RV32I-NEXT: lw s4, 24(sp) +; RV32I-NEXT: lw s3, 28(sp) +; RV32I-NEXT: lw s2, 32(sp) +; RV32I-NEXT: lw s1, 36(sp) +; RV32I-NEXT: lw s0, 40(sp) +; RV32I-NEXT: lw ra, 44(sp) +; RV32I-NEXT: .cfi_restore ra +; RV32I-NEXT: .cfi_restore s0 +; RV32I-NEXT: .cfi_restore s1 +; RV32I-NEXT: .cfi_restore s2 +; RV32I-NEXT: .cfi_restore s3 +; RV32I-NEXT: .cfi_restore s4 +; RV32I-NEXT: .cfi_restore s5 +; RV32I-NEXT: .cfi_restore s6 +; RV32I-NEXT: .cfi_restore s7 +; RV32I-NEXT: .cfi_restore s8 +; RV32I-NEXT: .cfi_restore s9 +; RV32I-NEXT: addi sp, sp, 48 +; RV32I-NEXT: .cfi_def_cfa_offset 0 +; RV32I-NEXT: ret +; +; RV32IM-LABEL: combine_urem_udiv: +; RV32IM: # %bb.0: +; RV32IM-NEXT: lhu a6, 0(a1) +; RV32IM-NEXT: lhu a7, 4(a1) +; RV32IM-NEXT: lhu a4, 12(a1) +; RV32IM-NEXT: lhu a1, 8(a1) +; RV32IM-NEXT: lui a5, 364242 +; RV32IM-NEXT: addi a5, a5, 777 +; RV32IM-NEXT: mulhu a2, a4, a5 +; RV32IM-NEXT: sub a3, a4, a2 +; RV32IM-NEXT: srli a3, a3, 1 +; RV32IM-NEXT: add a2, a3, a2 +; RV32IM-NEXT: srli t3, a2, 6 +; RV32IM-NEXT: addi t0, zero, 95 +; RV32IM-NEXT: mul a3, t3, t0 +; RV32IM-NEXT: sub t1, a4, a3 +; RV32IM-NEXT: mulhu a4, a1, a5 +; RV32IM-NEXT: sub a3, a1, a4 +; RV32IM-NEXT: srli a3, a3, 1 +; RV32IM-NEXT: add a3, a3, a4 +; RV32IM-NEXT: srli a3, a3, 6 +; RV32IM-NEXT: mul a4, a3, t0 +; RV32IM-NEXT: sub t2, a1, a4 +; RV32IM-NEXT: mulhu a4, a7, a5 +; RV32IM-NEXT: sub a1, a7, a4 +; RV32IM-NEXT: srli a1, a1, 1 +; RV32IM-NEXT: add a1, a1, a4 +; RV32IM-NEXT: srli a1, a1, 6 +; RV32IM-NEXT: mul a4, a1, t0 +; RV32IM-NEXT: sub a4, a7, a4 +; RV32IM-NEXT: mulhu a5, a6, a5 +; RV32IM-NEXT: sub a2, a6, a5 +; RV32IM-NEXT: srli a2, a2, 1 +; RV32IM-NEXT: add a2, a2, a5 +; RV32IM-NEXT: srli a2, a2, 6 +; RV32IM-NEXT: mul a5, a2, t0 +; RV32IM-NEXT: sub a5, a6, a5 +; RV32IM-NEXT: add a2, a5, a2 +; RV32IM-NEXT: add a1, a4, a1 +; RV32IM-NEXT: add a3, t2, a3 +; RV32IM-NEXT: add a4, t1, t3 +; RV32IM-NEXT: sh a4, 6(a0) +; RV32IM-NEXT: sh a3, 4(a0) +; RV32IM-NEXT: sh a1, 2(a0) +; RV32IM-NEXT: sh a2, 0(a0) +; RV32IM-NEXT: .cfi_def_cfa_offset 0 +; RV32IM-NEXT: ret +; +; RV64I-LABEL: combine_urem_udiv: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -96 +; RV64I-NEXT: .cfi_def_cfa_offset 96 +; RV64I-NEXT: sd ra, 88(sp) +; RV64I-NEXT: sd s0, 80(sp) +; RV64I-NEXT: sd s1, 72(sp) +; RV64I-NEXT: sd s2, 64(sp) +; RV64I-NEXT: sd s3, 56(sp) +; RV64I-NEXT: sd s4, 48(sp) +; RV64I-NEXT: sd s5, 40(sp) +; RV64I-NEXT: sd s6, 32(sp) +; RV64I-NEXT: sd s7, 24(sp) +; RV64I-NEXT: sd s8, 16(sp) +; RV64I-NEXT: sd s9, 8(sp) +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: .cfi_offset s0, -16 +; RV64I-NEXT: .cfi_offset s1, -24 +; RV64I-NEXT: .cfi_offset s2, -32 +; RV64I-NEXT: .cfi_offset s3, -40 +; RV64I-NEXT: .cfi_offset s4, -48 +; RV64I-NEXT: .cfi_offset s5, -56 +; RV64I-NEXT: .cfi_offset s6, -64 +; RV64I-NEXT: .cfi_offset s7, -72 +; RV64I-NEXT: .cfi_offset s8, -80 +; RV64I-NEXT: .cfi_offset s9, -88 +; RV64I-NEXT: lhu s2, 0(a1) +; RV64I-NEXT: lhu s3, 8(a1) +; RV64I-NEXT: lhu s4, 16(a1) +; RV64I-NEXT: lhu s1, 24(a1) +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: addi a1, zero, 95 +; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: call __umoddi3 +; RV64I-NEXT: mv s5, a0 +; RV64I-NEXT: addi a1, zero, 95 +; RV64I-NEXT: mv a0, s4 +; RV64I-NEXT: call __umoddi3 +; RV64I-NEXT: mv s6, a0 +; RV64I-NEXT: addi a1, zero, 95 +; RV64I-NEXT: mv a0, s3 +; RV64I-NEXT: call __umoddi3 +; RV64I-NEXT: mv s7, a0 +; RV64I-NEXT: addi a1, zero, 95 +; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: call __umoddi3 +; RV64I-NEXT: mv s8, a0 +; RV64I-NEXT: addi a1, zero, 95 +; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: call __udivdi3 +; RV64I-NEXT: mv s9, a0 +; RV64I-NEXT: addi a1, zero, 95 +; RV64I-NEXT: mv a0, s4 +; RV64I-NEXT: call __udivdi3 +; RV64I-NEXT: mv s4, a0 +; RV64I-NEXT: addi a1, zero, 95 +; RV64I-NEXT: mv a0, s3 +; RV64I-NEXT: call __udivdi3 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: addi a1, zero, 95 +; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: call __udivdi3 +; RV64I-NEXT: add a0, s8, a0 +; RV64I-NEXT: add a1, s7, s1 +; RV64I-NEXT: add a2, s6, s4 +; RV64I-NEXT: add a3, s5, s9 +; RV64I-NEXT: sh a3, 6(s0) +; RV64I-NEXT: sh a2, 4(s0) +; RV64I-NEXT: sh a1, 2(s0) +; RV64I-NEXT: sh a0, 0(s0) +; RV64I-NEXT: ld s9, 8(sp) +; RV64I-NEXT: ld s8, 16(sp) +; RV64I-NEXT: ld s7, 24(sp) +; RV64I-NEXT: ld s6, 32(sp) +; RV64I-NEXT: ld s5, 40(sp) +; RV64I-NEXT: ld s4, 48(sp) +; RV64I-NEXT: ld s3, 56(sp) +; RV64I-NEXT: ld s2, 64(sp) +; RV64I-NEXT: ld s1, 72(sp) +; RV64I-NEXT: ld s0, 80(sp) +; RV64I-NEXT: ld ra, 88(sp) +; RV64I-NEXT: .cfi_restore ra +; RV64I-NEXT: .cfi_restore s0 +; RV64I-NEXT: .cfi_restore s1 +; RV64I-NEXT: .cfi_restore s2 +; RV64I-NEXT: .cfi_restore s3 +; RV64I-NEXT: .cfi_restore s4 +; RV64I-NEXT: .cfi_restore s5 +; RV64I-NEXT: .cfi_restore s6 +; RV64I-NEXT: .cfi_restore s7 +; RV64I-NEXT: .cfi_restore s8 +; RV64I-NEXT: .cfi_restore s9 +; RV64I-NEXT: addi sp, sp, 96 +; RV64I-NEXT: .cfi_def_cfa_offset 0 +; RV64I-NEXT: ret +; +; RV64IM-LABEL: combine_urem_udiv: +; RV64IM: # %bb.0: +; RV64IM-NEXT: lhu a6, 0(a1) +; RV64IM-NEXT: lhu a7, 8(a1) +; RV64IM-NEXT: lhu a4, 16(a1) +; RV64IM-NEXT: lhu a1, 24(a1) +; RV64IM-NEXT: lui a5, 1423 +; RV64IM-NEXT: addiw a5, a5, -733 +; RV64IM-NEXT: slli a5, a5, 15 +; RV64IM-NEXT: addi a5, a5, 1035 +; RV64IM-NEXT: slli a5, a5, 13 +; RV64IM-NEXT: addi a5, a5, -1811 +; RV64IM-NEXT: slli a5, a5, 12 +; RV64IM-NEXT: addi a5, a5, 561 +; RV64IM-NEXT: mulhu a2, a1, a5 +; RV64IM-NEXT: sub a3, a1, a2 +; RV64IM-NEXT: srli a3, a3, 1 +; RV64IM-NEXT: add a2, a3, a2 +; RV64IM-NEXT: srli t3, a2, 6 +; RV64IM-NEXT: addi t0, zero, 95 +; RV64IM-NEXT: mul a3, t3, t0 +; RV64IM-NEXT: sub t1, a1, a3 +; RV64IM-NEXT: mulhu a3, a4, a5 +; RV64IM-NEXT: sub a1, a4, a3 +; RV64IM-NEXT: srli a1, a1, 1 +; RV64IM-NEXT: add a1, a1, a3 +; RV64IM-NEXT: srli a1, a1, 6 +; RV64IM-NEXT: mul a3, a1, t0 +; RV64IM-NEXT: sub t2, a4, a3 +; RV64IM-NEXT: mulhu a4, a7, a5 +; RV64IM-NEXT: sub a3, a7, a4 +; RV64IM-NEXT: srli a3, a3, 1 +; RV64IM-NEXT: add a3, a3, a4 +; RV64IM-NEXT: srli a3, a3, 6 +; RV64IM-NEXT: mul a4, a3, t0 +; RV64IM-NEXT: sub a4, a7, a4 +; RV64IM-NEXT: mulhu a5, a6, a5 +; RV64IM-NEXT: sub a2, a6, a5 +; RV64IM-NEXT: srli a2, a2, 1 +; RV64IM-NEXT: add a2, a2, a5 +; RV64IM-NEXT: srli a2, a2, 6 +; RV64IM-NEXT: mul a5, a2, t0 +; RV64IM-NEXT: sub a5, a6, a5 +; RV64IM-NEXT: add a2, a5, a2 +; RV64IM-NEXT: add a3, a4, a3 +; RV64IM-NEXT: add a1, t2, a1 +; RV64IM-NEXT: add a4, t1, t3 +; RV64IM-NEXT: sh a4, 6(a0) +; RV64IM-NEXT: sh a1, 4(a0) +; RV64IM-NEXT: sh a3, 2(a0) +; RV64IM-NEXT: sh a2, 0(a0) +; RV64IM-NEXT: .cfi_def_cfa_offset 0 +; RV64IM-NEXT: ret + %1 = urem <4 x i16> %x, + %2 = udiv <4 x i16> %x, + %3 = add <4 x i16> %1, %2 + ret <4 x i16> %3 +} + +; Don't fold for divisors that are a power of two. +define <4 x i16> @dont_fold_urem_power_of_two(<4 x i16> %x) { +; RV32I-LABEL: dont_fold_urem_power_of_two: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -32 +; RV32I-NEXT: .cfi_def_cfa_offset 32 +; RV32I-NEXT: sw ra, 28(sp) +; RV32I-NEXT: sw s0, 24(sp) +; RV32I-NEXT: sw s1, 20(sp) +; RV32I-NEXT: sw s2, 16(sp) +; RV32I-NEXT: sw s3, 12(sp) +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: .cfi_offset s0, -8 +; RV32I-NEXT: .cfi_offset s1, -12 +; RV32I-NEXT: .cfi_offset s2, -16 +; RV32I-NEXT: .cfi_offset s3, -20 +; RV32I-NEXT: lhu s2, 8(a1) +; RV32I-NEXT: lhu s3, 4(a1) +; RV32I-NEXT: lhu s1, 0(a1) +; RV32I-NEXT: lhu a2, 12(a1) +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: addi a1, zero, 95 +; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: call __umodsi3 +; RV32I-NEXT: andi a1, s1, 63 +; RV32I-NEXT: andi a2, s3, 31 +; RV32I-NEXT: andi a3, s2, 7 +; RV32I-NEXT: sh a0, 6(s0) +; RV32I-NEXT: sh a3, 4(s0) +; RV32I-NEXT: sh a2, 2(s0) +; RV32I-NEXT: sh a1, 0(s0) +; RV32I-NEXT: lw s3, 12(sp) +; RV32I-NEXT: lw s2, 16(sp) +; RV32I-NEXT: lw s1, 20(sp) +; RV32I-NEXT: lw s0, 24(sp) +; RV32I-NEXT: lw ra, 28(sp) +; RV32I-NEXT: .cfi_restore ra +; RV32I-NEXT: .cfi_restore s0 +; RV32I-NEXT: .cfi_restore s1 +; RV32I-NEXT: .cfi_restore s2 +; RV32I-NEXT: .cfi_restore s3 +; RV32I-NEXT: addi sp, sp, 32 +; RV32I-NEXT: .cfi_def_cfa_offset 0 +; RV32I-NEXT: ret +; +; RV32IM-LABEL: dont_fold_urem_power_of_two: +; RV32IM: # %bb.0: +; RV32IM-NEXT: lhu a6, 8(a1) +; RV32IM-NEXT: lhu a3, 4(a1) +; RV32IM-NEXT: lhu a4, 12(a1) +; RV32IM-NEXT: lhu a1, 0(a1) +; RV32IM-NEXT: lui a5, 364242 +; RV32IM-NEXT: addi a5, a5, 777 +; RV32IM-NEXT: mulhu a5, a4, a5 +; RV32IM-NEXT: sub a2, a4, a5 +; RV32IM-NEXT: srli a2, a2, 1 +; RV32IM-NEXT: add a2, a2, a5 +; RV32IM-NEXT: srli a2, a2, 6 +; RV32IM-NEXT: addi a5, zero, 95 +; RV32IM-NEXT: mul a2, a2, a5 +; RV32IM-NEXT: sub a2, a4, a2 +; RV32IM-NEXT: andi a1, a1, 63 +; RV32IM-NEXT: andi a3, a3, 31 +; RV32IM-NEXT: andi a4, a6, 7 +; RV32IM-NEXT: sh a4, 4(a0) +; RV32IM-NEXT: sh a3, 2(a0) +; RV32IM-NEXT: sh a1, 0(a0) +; RV32IM-NEXT: sh a2, 6(a0) +; RV32IM-NEXT: .cfi_def_cfa_offset 0 +; RV32IM-NEXT: ret +; +; RV64I-LABEL: dont_fold_urem_power_of_two: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -48 +; RV64I-NEXT: .cfi_def_cfa_offset 48 +; RV64I-NEXT: sd ra, 40(sp) +; RV64I-NEXT: sd s0, 32(sp) +; RV64I-NEXT: sd s1, 24(sp) +; RV64I-NEXT: sd s2, 16(sp) +; RV64I-NEXT: sd s3, 8(sp) +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: .cfi_offset s0, -16 +; RV64I-NEXT: .cfi_offset s1, -24 +; RV64I-NEXT: .cfi_offset s2, -32 +; RV64I-NEXT: .cfi_offset s3, -40 +; RV64I-NEXT: lhu s2, 16(a1) +; RV64I-NEXT: lhu s3, 8(a1) +; RV64I-NEXT: lhu s1, 0(a1) +; RV64I-NEXT: lhu a2, 24(a1) +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: addi a1, zero, 95 +; RV64I-NEXT: mv a0, a2 +; RV64I-NEXT: call __umoddi3 +; RV64I-NEXT: andi a1, s1, 63 +; RV64I-NEXT: andi a2, s3, 31 +; RV64I-NEXT: andi a3, s2, 7 +; RV64I-NEXT: sh a0, 6(s0) +; RV64I-NEXT: sh a3, 4(s0) +; RV64I-NEXT: sh a2, 2(s0) +; RV64I-NEXT: sh a1, 0(s0) +; RV64I-NEXT: ld s3, 8(sp) +; RV64I-NEXT: ld s2, 16(sp) +; RV64I-NEXT: ld s1, 24(sp) +; RV64I-NEXT: ld s0, 32(sp) +; RV64I-NEXT: ld ra, 40(sp) +; RV64I-NEXT: .cfi_restore ra +; RV64I-NEXT: .cfi_restore s0 +; RV64I-NEXT: .cfi_restore s1 +; RV64I-NEXT: .cfi_restore s2 +; RV64I-NEXT: .cfi_restore s3 +; RV64I-NEXT: addi sp, sp, 48 +; RV64I-NEXT: .cfi_def_cfa_offset 0 +; RV64I-NEXT: ret +; +; RV64IM-LABEL: dont_fold_urem_power_of_two: +; RV64IM: # %bb.0: +; RV64IM-NEXT: lhu a6, 16(a1) +; RV64IM-NEXT: lhu a3, 8(a1) +; RV64IM-NEXT: lhu a4, 0(a1) +; RV64IM-NEXT: lhu a1, 24(a1) +; RV64IM-NEXT: lui a5, 1423 +; RV64IM-NEXT: addiw a5, a5, -733 +; RV64IM-NEXT: slli a5, a5, 15 +; RV64IM-NEXT: addi a5, a5, 1035 +; RV64IM-NEXT: slli a5, a5, 13 +; RV64IM-NEXT: addi a5, a5, -1811 +; RV64IM-NEXT: slli a5, a5, 12 +; RV64IM-NEXT: addi a5, a5, 561 +; RV64IM-NEXT: mulhu a5, a1, a5 +; RV64IM-NEXT: sub a2, a1, a5 +; RV64IM-NEXT: srli a2, a2, 1 +; RV64IM-NEXT: add a2, a2, a5 +; RV64IM-NEXT: srli a2, a2, 6 +; RV64IM-NEXT: addi a5, zero, 95 +; RV64IM-NEXT: mul a2, a2, a5 +; RV64IM-NEXT: sub a1, a1, a2 +; RV64IM-NEXT: andi a2, a4, 63 +; RV64IM-NEXT: andi a3, a3, 31 +; RV64IM-NEXT: andi a4, a6, 7 +; RV64IM-NEXT: sh a4, 4(a0) +; RV64IM-NEXT: sh a3, 2(a0) +; RV64IM-NEXT: sh a2, 0(a0) +; RV64IM-NEXT: sh a1, 6(a0) +; RV64IM-NEXT: .cfi_def_cfa_offset 0 +; RV64IM-NEXT: ret + %1 = urem <4 x i16> %x, + ret <4 x i16> %1 +} + +; Don't fold if the divisor is one. +define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) { +; RV32I-LABEL: dont_fold_urem_one: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -32 +; RV32I-NEXT: .cfi_def_cfa_offset 32 +; RV32I-NEXT: sw ra, 28(sp) +; RV32I-NEXT: sw s0, 24(sp) +; RV32I-NEXT: sw s1, 20(sp) +; RV32I-NEXT: sw s2, 16(sp) +; RV32I-NEXT: sw s3, 12(sp) +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: .cfi_offset s0, -8 +; RV32I-NEXT: .cfi_offset s1, -12 +; RV32I-NEXT: .cfi_offset s2, -16 +; RV32I-NEXT: .cfi_offset s3, -20 +; RV32I-NEXT: lhu s2, 12(a1) +; RV32I-NEXT: lhu s1, 8(a1) +; RV32I-NEXT: lhu a2, 4(a1) +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: addi a1, zero, 654 +; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: call __umodsi3 +; RV32I-NEXT: mv s3, a0 +; RV32I-NEXT: addi a1, zero, 23 +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: call __umodsi3 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: lui a0, 1 +; RV32I-NEXT: addi a1, a0, 1327 +; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: call __umodsi3 +; RV32I-NEXT: sh zero, 0(s0) +; RV32I-NEXT: sh a0, 6(s0) +; RV32I-NEXT: sh s1, 4(s0) +; RV32I-NEXT: sh s3, 2(s0) +; RV32I-NEXT: lw s3, 12(sp) +; RV32I-NEXT: lw s2, 16(sp) +; RV32I-NEXT: lw s1, 20(sp) +; RV32I-NEXT: lw s0, 24(sp) +; RV32I-NEXT: lw ra, 28(sp) +; RV32I-NEXT: .cfi_restore ra +; RV32I-NEXT: .cfi_restore s0 +; RV32I-NEXT: .cfi_restore s1 +; RV32I-NEXT: .cfi_restore s2 +; RV32I-NEXT: .cfi_restore s3 +; RV32I-NEXT: addi sp, sp, 32 +; RV32I-NEXT: .cfi_def_cfa_offset 0 +; RV32I-NEXT: ret +; +; RV32IM-LABEL: dont_fold_urem_one: +; RV32IM: # %bb.0: +; RV32IM-NEXT: lhu a2, 4(a1) +; RV32IM-NEXT: lhu a3, 12(a1) +; RV32IM-NEXT: lhu a1, 8(a1) +; RV32IM-NEXT: srli a4, a2, 1 +; RV32IM-NEXT: lui a5, 820904 +; RV32IM-NEXT: addi a5, a5, -1903 +; RV32IM-NEXT: mulhu a4, a4, a5 +; RV32IM-NEXT: srli a4, a4, 8 +; RV32IM-NEXT: addi a5, zero, 654 +; RV32IM-NEXT: mul a4, a4, a5 +; RV32IM-NEXT: sub a2, a2, a4 +; RV32IM-NEXT: lui a4, 729444 +; RV32IM-NEXT: addi a4, a4, 713 +; RV32IM-NEXT: mulhu a4, a1, a4 +; RV32IM-NEXT: srli a4, a4, 4 +; RV32IM-NEXT: addi a5, zero, 23 +; RV32IM-NEXT: mul a4, a4, a5 +; RV32IM-NEXT: sub a1, a1, a4 +; RV32IM-NEXT: lui a4, 395996 +; RV32IM-NEXT: addi a4, a4, -2009 +; RV32IM-NEXT: mulhu a4, a3, a4 +; RV32IM-NEXT: srli a4, a4, 11 +; RV32IM-NEXT: lui a5, 1 +; RV32IM-NEXT: addi a5, a5, 1327 +; RV32IM-NEXT: mul a4, a4, a5 +; RV32IM-NEXT: sub a3, a3, a4 +; RV32IM-NEXT: sh zero, 0(a0) +; RV32IM-NEXT: sh a3, 6(a0) +; RV32IM-NEXT: sh a1, 4(a0) +; RV32IM-NEXT: sh a2, 2(a0) +; RV32IM-NEXT: .cfi_def_cfa_offset 0 +; RV32IM-NEXT: ret +; +; RV64I-LABEL: dont_fold_urem_one: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -48 +; RV64I-NEXT: .cfi_def_cfa_offset 48 +; RV64I-NEXT: sd ra, 40(sp) +; RV64I-NEXT: sd s0, 32(sp) +; RV64I-NEXT: sd s1, 24(sp) +; RV64I-NEXT: sd s2, 16(sp) +; RV64I-NEXT: sd s3, 8(sp) +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: .cfi_offset s0, -16 +; RV64I-NEXT: .cfi_offset s1, -24 +; RV64I-NEXT: .cfi_offset s2, -32 +; RV64I-NEXT: .cfi_offset s3, -40 +; RV64I-NEXT: lhu s2, 24(a1) +; RV64I-NEXT: lhu s1, 16(a1) +; RV64I-NEXT: lhu a2, 8(a1) +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: addi a1, zero, 654 +; RV64I-NEXT: mv a0, a2 +; RV64I-NEXT: call __umoddi3 +; RV64I-NEXT: mv s3, a0 +; RV64I-NEXT: addi a1, zero, 23 +; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: call __umoddi3 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: lui a0, 1 +; RV64I-NEXT: addiw a1, a0, 1327 +; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: call __umoddi3 +; RV64I-NEXT: sh zero, 0(s0) +; RV64I-NEXT: sh a0, 6(s0) +; RV64I-NEXT: sh s1, 4(s0) +; RV64I-NEXT: sh s3, 2(s0) +; RV64I-NEXT: ld s3, 8(sp) +; RV64I-NEXT: ld s2, 16(sp) +; RV64I-NEXT: ld s1, 24(sp) +; RV64I-NEXT: ld s0, 32(sp) +; RV64I-NEXT: ld ra, 40(sp) +; RV64I-NEXT: .cfi_restore ra +; RV64I-NEXT: .cfi_restore s0 +; RV64I-NEXT: .cfi_restore s1 +; RV64I-NEXT: .cfi_restore s2 +; RV64I-NEXT: .cfi_restore s3 +; RV64I-NEXT: addi sp, sp, 48 +; RV64I-NEXT: .cfi_def_cfa_offset 0 +; RV64I-NEXT: ret +; +; RV64IM-LABEL: dont_fold_urem_one: +; RV64IM: # %bb.0: +; RV64IM-NEXT: lhu a2, 24(a1) +; RV64IM-NEXT: lhu a3, 8(a1) +; RV64IM-NEXT: lhu a1, 16(a1) +; RV64IM-NEXT: lui a4, 3206 +; RV64IM-NEXT: addiw a4, a4, -1781 +; RV64IM-NEXT: slli a4, a4, 13 +; RV64IM-NEXT: addi a4, a4, 1069 +; RV64IM-NEXT: slli a4, a4, 12 +; RV64IM-NEXT: addi a4, a4, -1959 +; RV64IM-NEXT: slli a4, a4, 14 +; RV64IM-NEXT: addi a4, a4, 713 +; RV64IM-NEXT: mulhu a4, a1, a4 +; RV64IM-NEXT: sub a5, a1, a4 +; RV64IM-NEXT: srli a5, a5, 1 +; RV64IM-NEXT: add a4, a5, a4 +; RV64IM-NEXT: srli a4, a4, 4 +; RV64IM-NEXT: addi a5, zero, 23 +; RV64IM-NEXT: mul a4, a4, a5 +; RV64IM-NEXT: sub a1, a1, a4 +; RV64IM-NEXT: srli a4, a3, 1 +; RV64IM-NEXT: lui a5, 6413 +; RV64IM-NEXT: addiw a5, a5, 1265 +; RV64IM-NEXT: slli a5, a5, 13 +; RV64IM-NEXT: addi a5, a5, 1027 +; RV64IM-NEXT: slli a5, a5, 13 +; RV64IM-NEXT: addi a5, a5, 1077 +; RV64IM-NEXT: slli a5, a5, 12 +; RV64IM-NEXT: addi a5, a5, 965 +; RV64IM-NEXT: mulhu a4, a4, a5 +; RV64IM-NEXT: srli a4, a4, 7 +; RV64IM-NEXT: addi a5, zero, 654 +; RV64IM-NEXT: mul a4, a4, a5 +; RV64IM-NEXT: sub a3, a3, a4 +; RV64IM-NEXT: lui a4, 1044567 +; RV64IM-NEXT: addiw a4, a4, -575 +; RV64IM-NEXT: slli a4, a4, 12 +; RV64IM-NEXT: addi a4, a4, 883 +; RV64IM-NEXT: slli a4, a4, 14 +; RV64IM-NEXT: addi a4, a4, -861 +; RV64IM-NEXT: slli a4, a4, 12 +; RV64IM-NEXT: addi a4, a4, -179 +; RV64IM-NEXT: mulhu a4, a2, a4 +; RV64IM-NEXT: srli a4, a4, 12 +; RV64IM-NEXT: lui a5, 1 +; RV64IM-NEXT: addiw a5, a5, 1327 +; RV64IM-NEXT: mul a4, a4, a5 +; RV64IM-NEXT: sub a2, a2, a4 +; RV64IM-NEXT: sh zero, 0(a0) +; RV64IM-NEXT: sh a2, 6(a0) +; RV64IM-NEXT: sh a3, 2(a0) +; RV64IM-NEXT: sh a1, 4(a0) +; RV64IM-NEXT: .cfi_def_cfa_offset 0 +; RV64IM-NEXT: ret + %1 = urem <4 x i16> %x, + ret <4 x i16> %1 +} + +; Don't fold if the divisor is 2^16. +define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) { +; CHECK-LABEL: dont_fold_urem_i16_smax: +; CHECK: # %bb.0: +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: ret + %1 = urem <4 x i16> %x, + ret <4 x i16> %1 +} + +; Don't fold i64 urem. +define <4 x i64> @dont_fold_urem_i64(<4 x i64> %x) { +; RV32I-LABEL: dont_fold_urem_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -48 +; RV32I-NEXT: .cfi_def_cfa_offset 48 +; RV32I-NEXT: sw ra, 44(sp) +; RV32I-NEXT: sw s0, 40(sp) +; RV32I-NEXT: sw s1, 36(sp) +; RV32I-NEXT: sw s2, 32(sp) +; RV32I-NEXT: sw s3, 28(sp) +; RV32I-NEXT: sw s4, 24(sp) +; RV32I-NEXT: sw s5, 20(sp) +; RV32I-NEXT: sw s6, 16(sp) +; RV32I-NEXT: sw s7, 12(sp) +; RV32I-NEXT: sw s8, 8(sp) +; RV32I-NEXT: sw s9, 4(sp) +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: .cfi_offset s0, -8 +; RV32I-NEXT: .cfi_offset s1, -12 +; RV32I-NEXT: .cfi_offset s2, -16 +; RV32I-NEXT: .cfi_offset s3, -20 +; RV32I-NEXT: .cfi_offset s4, -24 +; RV32I-NEXT: .cfi_offset s5, -28 +; RV32I-NEXT: .cfi_offset s6, -32 +; RV32I-NEXT: .cfi_offset s7, -36 +; RV32I-NEXT: .cfi_offset s8, -40 +; RV32I-NEXT: .cfi_offset s9, -44 +; RV32I-NEXT: lw s2, 24(a1) +; RV32I-NEXT: lw s3, 28(a1) +; RV32I-NEXT: lw s4, 16(a1) +; RV32I-NEXT: lw s5, 20(a1) +; RV32I-NEXT: lw s6, 8(a1) +; RV32I-NEXT: lw s1, 12(a1) +; RV32I-NEXT: lw a3, 0(a1) +; RV32I-NEXT: lw a1, 4(a1) +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: addi a2, zero, 1 +; RV32I-NEXT: mv a0, a3 +; RV32I-NEXT: mv a3, zero +; RV32I-NEXT: call __umoddi3 +; RV32I-NEXT: mv s7, a0 +; RV32I-NEXT: mv s8, a1 +; RV32I-NEXT: addi a2, zero, 654 +; RV32I-NEXT: mv a0, s6 +; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: mv a3, zero +; RV32I-NEXT: call __umoddi3 +; RV32I-NEXT: mv s6, a0 +; RV32I-NEXT: mv s9, a1 +; RV32I-NEXT: addi a2, zero, 23 +; RV32I-NEXT: mv a0, s4 +; RV32I-NEXT: mv a1, s5 +; RV32I-NEXT: mv a3, zero +; RV32I-NEXT: call __umoddi3 +; RV32I-NEXT: mv s4, a0 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: lui a0, 1 +; RV32I-NEXT: addi a2, a0, 1327 +; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a1, s3 +; RV32I-NEXT: mv a3, zero +; RV32I-NEXT: call __umoddi3 +; RV32I-NEXT: sw a1, 28(s0) +; RV32I-NEXT: sw a0, 24(s0) +; RV32I-NEXT: sw s1, 20(s0) +; RV32I-NEXT: sw s4, 16(s0) +; RV32I-NEXT: sw s9, 12(s0) +; RV32I-NEXT: sw s6, 8(s0) +; RV32I-NEXT: sw s8, 4(s0) +; RV32I-NEXT: sw s7, 0(s0) +; RV32I-NEXT: lw s9, 4(sp) +; RV32I-NEXT: lw s8, 8(sp) +; RV32I-NEXT: lw s7, 12(sp) +; RV32I-NEXT: lw s6, 16(sp) +; RV32I-NEXT: lw s5, 20(sp) +; RV32I-NEXT: lw s4, 24(sp) +; RV32I-NEXT: lw s3, 28(sp) +; RV32I-NEXT: lw s2, 32(sp) +; RV32I-NEXT: lw s1, 36(sp) +; RV32I-NEXT: lw s0, 40(sp) +; RV32I-NEXT: lw ra, 44(sp) +; RV32I-NEXT: .cfi_restore ra +; RV32I-NEXT: .cfi_restore s0 +; RV32I-NEXT: .cfi_restore s1 +; RV32I-NEXT: .cfi_restore s2 +; RV32I-NEXT: .cfi_restore s3 +; RV32I-NEXT: .cfi_restore s4 +; RV32I-NEXT: .cfi_restore s5 +; RV32I-NEXT: .cfi_restore s6 +; RV32I-NEXT: .cfi_restore s7 +; RV32I-NEXT: .cfi_restore s8 +; RV32I-NEXT: .cfi_restore s9 +; RV32I-NEXT: addi sp, sp, 48 +; RV32I-NEXT: .cfi_def_cfa_offset 0 +; RV32I-NEXT: ret +; +; RV32IM-LABEL: dont_fold_urem_i64: +; RV32IM: # %bb.0: +; RV32IM-NEXT: addi sp, sp, -48 +; RV32IM-NEXT: .cfi_def_cfa_offset 48 +; RV32IM-NEXT: sw ra, 44(sp) +; RV32IM-NEXT: sw s0, 40(sp) +; RV32IM-NEXT: sw s1, 36(sp) +; RV32IM-NEXT: sw s2, 32(sp) +; RV32IM-NEXT: sw s3, 28(sp) +; RV32IM-NEXT: sw s4, 24(sp) +; RV32IM-NEXT: sw s5, 20(sp) +; RV32IM-NEXT: sw s6, 16(sp) +; RV32IM-NEXT: sw s7, 12(sp) +; RV32IM-NEXT: sw s8, 8(sp) +; RV32IM-NEXT: sw s9, 4(sp) +; RV32IM-NEXT: .cfi_offset ra, -4 +; RV32IM-NEXT: .cfi_offset s0, -8 +; RV32IM-NEXT: .cfi_offset s1, -12 +; RV32IM-NEXT: .cfi_offset s2, -16 +; RV32IM-NEXT: .cfi_offset s3, -20 +; RV32IM-NEXT: .cfi_offset s4, -24 +; RV32IM-NEXT: .cfi_offset s5, -28 +; RV32IM-NEXT: .cfi_offset s6, -32 +; RV32IM-NEXT: .cfi_offset s7, -36 +; RV32IM-NEXT: .cfi_offset s8, -40 +; RV32IM-NEXT: .cfi_offset s9, -44 +; RV32IM-NEXT: lw s2, 24(a1) +; RV32IM-NEXT: lw s3, 28(a1) +; RV32IM-NEXT: lw s4, 16(a1) +; RV32IM-NEXT: lw s5, 20(a1) +; RV32IM-NEXT: lw s6, 8(a1) +; RV32IM-NEXT: lw s1, 12(a1) +; RV32IM-NEXT: lw a3, 0(a1) +; RV32IM-NEXT: lw a1, 4(a1) +; RV32IM-NEXT: mv s0, a0 +; RV32IM-NEXT: addi a2, zero, 1 +; RV32IM-NEXT: mv a0, a3 +; RV32IM-NEXT: mv a3, zero +; RV32IM-NEXT: call __umoddi3 +; RV32IM-NEXT: mv s7, a0 +; RV32IM-NEXT: mv s8, a1 +; RV32IM-NEXT: addi a2, zero, 654 +; RV32IM-NEXT: mv a0, s6 +; RV32IM-NEXT: mv a1, s1 +; RV32IM-NEXT: mv a3, zero +; RV32IM-NEXT: call __umoddi3 +; RV32IM-NEXT: mv s6, a0 +; RV32IM-NEXT: mv s9, a1 +; RV32IM-NEXT: addi a2, zero, 23 +; RV32IM-NEXT: mv a0, s4 +; RV32IM-NEXT: mv a1, s5 +; RV32IM-NEXT: mv a3, zero +; RV32IM-NEXT: call __umoddi3 +; RV32IM-NEXT: mv s4, a0 +; RV32IM-NEXT: mv s1, a1 +; RV32IM-NEXT: lui a0, 1 +; RV32IM-NEXT: addi a2, a0, 1327 +; RV32IM-NEXT: mv a0, s2 +; RV32IM-NEXT: mv a1, s3 +; RV32IM-NEXT: mv a3, zero +; RV32IM-NEXT: call __umoddi3 +; RV32IM-NEXT: sw a1, 28(s0) +; RV32IM-NEXT: sw a0, 24(s0) +; RV32IM-NEXT: sw s1, 20(s0) +; RV32IM-NEXT: sw s4, 16(s0) +; RV32IM-NEXT: sw s9, 12(s0) +; RV32IM-NEXT: sw s6, 8(s0) +; RV32IM-NEXT: sw s8, 4(s0) +; RV32IM-NEXT: sw s7, 0(s0) +; RV32IM-NEXT: lw s9, 4(sp) +; RV32IM-NEXT: lw s8, 8(sp) +; RV32IM-NEXT: lw s7, 12(sp) +; RV32IM-NEXT: lw s6, 16(sp) +; RV32IM-NEXT: lw s5, 20(sp) +; RV32IM-NEXT: lw s4, 24(sp) +; RV32IM-NEXT: lw s3, 28(sp) +; RV32IM-NEXT: lw s2, 32(sp) +; RV32IM-NEXT: lw s1, 36(sp) +; RV32IM-NEXT: lw s0, 40(sp) +; RV32IM-NEXT: lw ra, 44(sp) +; RV32IM-NEXT: .cfi_restore ra +; RV32IM-NEXT: .cfi_restore s0 +; RV32IM-NEXT: .cfi_restore s1 +; RV32IM-NEXT: .cfi_restore s2 +; RV32IM-NEXT: .cfi_restore s3 +; RV32IM-NEXT: .cfi_restore s4 +; RV32IM-NEXT: .cfi_restore s5 +; RV32IM-NEXT: .cfi_restore s6 +; RV32IM-NEXT: .cfi_restore s7 +; RV32IM-NEXT: .cfi_restore s8 +; RV32IM-NEXT: .cfi_restore s9 +; RV32IM-NEXT: addi sp, sp, 48 +; RV32IM-NEXT: .cfi_def_cfa_offset 0 +; RV32IM-NEXT: ret +; +; RV64I-LABEL: dont_fold_urem_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -48 +; RV64I-NEXT: .cfi_def_cfa_offset 48 +; RV64I-NEXT: sd ra, 40(sp) +; RV64I-NEXT: sd s0, 32(sp) +; RV64I-NEXT: sd s1, 24(sp) +; RV64I-NEXT: sd s2, 16(sp) +; RV64I-NEXT: sd s3, 8(sp) +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: .cfi_offset s0, -16 +; RV64I-NEXT: .cfi_offset s1, -24 +; RV64I-NEXT: .cfi_offset s2, -32 +; RV64I-NEXT: .cfi_offset s3, -40 +; RV64I-NEXT: ld s2, 24(a1) +; RV64I-NEXT: ld s1, 16(a1) +; RV64I-NEXT: ld a2, 8(a1) +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: addi a1, zero, 654 +; RV64I-NEXT: mv a0, a2 +; RV64I-NEXT: call __umoddi3 +; RV64I-NEXT: mv s3, a0 +; RV64I-NEXT: addi a1, zero, 23 +; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: call __umoddi3 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: lui a0, 1 +; RV64I-NEXT: addiw a1, a0, 1327 +; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: call __umoddi3 +; RV64I-NEXT: sd zero, 0(s0) +; RV64I-NEXT: sd a0, 24(s0) +; RV64I-NEXT: sd s1, 16(s0) +; RV64I-NEXT: sd s3, 8(s0) +; RV64I-NEXT: ld s3, 8(sp) +; RV64I-NEXT: ld s2, 16(sp) +; RV64I-NEXT: ld s1, 24(sp) +; RV64I-NEXT: ld s0, 32(sp) +; RV64I-NEXT: ld ra, 40(sp) +; RV64I-NEXT: .cfi_restore ra +; RV64I-NEXT: .cfi_restore s0 +; RV64I-NEXT: .cfi_restore s1 +; RV64I-NEXT: .cfi_restore s2 +; RV64I-NEXT: .cfi_restore s3 +; RV64I-NEXT: addi sp, sp, 48 +; RV64I-NEXT: .cfi_def_cfa_offset 0 +; RV64I-NEXT: ret +; +; RV64IM-LABEL: dont_fold_urem_i64: +; RV64IM: # %bb.0: +; RV64IM-NEXT: ld a2, 24(a1) +; RV64IM-NEXT: ld a3, 8(a1) +; RV64IM-NEXT: ld a1, 16(a1) +; RV64IM-NEXT: lui a4, 3206 +; RV64IM-NEXT: addiw a4, a4, -1781 +; RV64IM-NEXT: slli a4, a4, 13 +; RV64IM-NEXT: addi a4, a4, 1069 +; RV64IM-NEXT: slli a4, a4, 12 +; RV64IM-NEXT: addi a4, a4, -1959 +; RV64IM-NEXT: slli a4, a4, 14 +; RV64IM-NEXT: addi a4, a4, 713 +; RV64IM-NEXT: mulhu a4, a1, a4 +; RV64IM-NEXT: sub a5, a1, a4 +; RV64IM-NEXT: srli a5, a5, 1 +; RV64IM-NEXT: add a4, a5, a4 +; RV64IM-NEXT: srli a4, a4, 4 +; RV64IM-NEXT: addi a5, zero, 23 +; RV64IM-NEXT: mul a4, a4, a5 +; RV64IM-NEXT: sub a1, a1, a4 +; RV64IM-NEXT: srli a4, a3, 1 +; RV64IM-NEXT: lui a5, 6413 +; RV64IM-NEXT: addiw a5, a5, 1265 +; RV64IM-NEXT: slli a5, a5, 13 +; RV64IM-NEXT: addi a5, a5, 1027 +; RV64IM-NEXT: slli a5, a5, 13 +; RV64IM-NEXT: addi a5, a5, 1077 +; RV64IM-NEXT: slli a5, a5, 12 +; RV64IM-NEXT: addi a5, a5, 965 +; RV64IM-NEXT: mulhu a4, a4, a5 +; RV64IM-NEXT: srli a4, a4, 7 +; RV64IM-NEXT: addi a5, zero, 654 +; RV64IM-NEXT: mul a4, a4, a5 +; RV64IM-NEXT: sub a3, a3, a4 +; RV64IM-NEXT: lui a4, 1044567 +; RV64IM-NEXT: addiw a4, a4, -575 +; RV64IM-NEXT: slli a4, a4, 12 +; RV64IM-NEXT: addi a4, a4, 883 +; RV64IM-NEXT: slli a4, a4, 14 +; RV64IM-NEXT: addi a4, a4, -861 +; RV64IM-NEXT: slli a4, a4, 12 +; RV64IM-NEXT: addi a4, a4, -179 +; RV64IM-NEXT: mulhu a4, a2, a4 +; RV64IM-NEXT: srli a4, a4, 12 +; RV64IM-NEXT: lui a5, 1 +; RV64IM-NEXT: addiw a5, a5, 1327 +; RV64IM-NEXT: mul a4, a4, a5 +; RV64IM-NEXT: sub a2, a2, a4 +; RV64IM-NEXT: sd zero, 0(a0) +; RV64IM-NEXT: sd a2, 24(a0) +; RV64IM-NEXT: sd a3, 8(a0) +; RV64IM-NEXT: sd a1, 16(a0) +; RV64IM-NEXT: .cfi_def_cfa_offset 0 +; RV64IM-NEXT: ret + %1 = urem <4 x i64> %x, + ret <4 x i64> %1 +} diff --git a/llvm/test/CodeGen/X86/srem-lkk.ll b/llvm/test/CodeGen/X86/srem-lkk.ll new file mode 100644 index 0000000000000..ae30ae4463a93 --- /dev/null +++ b/llvm/test/CodeGen/X86/srem-lkk.ll @@ -0,0 +1,159 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=CHECK + +define i32 @fold_srem_positive_odd(i32 %x) { +; CHECK-LABEL: fold_srem_positive_odd: +; CHECK: # %bb.0: +; CHECK-NEXT: movslq %edi, %rax +; CHECK-NEXT: imulq $-1401515643, %rax, %rcx # imm = 0xAC769185 +; CHECK-NEXT: shrq $32, %rcx +; CHECK-NEXT: addl %eax, %ecx +; CHECK-NEXT: movl %ecx, %edx +; CHECK-NEXT: shrl $31, %edx +; CHECK-NEXT: sarl $6, %ecx +; CHECK-NEXT: addl %edx, %ecx +; CHECK-NEXT: imull $95, %ecx, %ecx +; CHECK-NEXT: subl %ecx, %eax +; CHECK-NEXT: # kill: def $eax killed $eax killed $rax +; CHECK-NEXT: retq + %1 = srem i32 %x, 95 + ret i32 %1 +} + + +define i32 @fold_srem_positive_even(i32 %x) { +; CHECK-LABEL: fold_srem_positive_even: +; CHECK: # %bb.0: +; CHECK-NEXT: movslq %edi, %rax +; CHECK-NEXT: imulq $1037275121, %rax, %rcx # imm = 0x3DD38FF1 +; CHECK-NEXT: movq %rcx, %rdx +; CHECK-NEXT: shrq $63, %rdx +; CHECK-NEXT: sarq $40, %rcx +; CHECK-NEXT: addl %edx, %ecx +; CHECK-NEXT: imull $1060, %ecx, %ecx # imm = 0x424 +; CHECK-NEXT: subl %ecx, %eax +; CHECK-NEXT: # kill: def $eax killed $eax killed $rax +; CHECK-NEXT: retq + %1 = srem i32 %x, 1060 + ret i32 %1 +} + + +define i32 @fold_srem_negative_odd(i32 %x) { +; CHECK-LABEL: fold_srem_negative_odd: +; CHECK: # %bb.0: +; CHECK-NEXT: movslq %edi, %rax +; CHECK-NEXT: imulq $-1520762971, %rax, %rcx # imm = 0xA55AFFA5 +; CHECK-NEXT: movq %rcx, %rdx +; CHECK-NEXT: shrq $63, %rdx +; CHECK-NEXT: sarq $40, %rcx +; CHECK-NEXT: addl %edx, %ecx +; CHECK-NEXT: imull $-723, %ecx, %ecx # imm = 0xFD2D +; CHECK-NEXT: subl %ecx, %eax +; CHECK-NEXT: # kill: def $eax killed $eax killed $rax +; CHECK-NEXT: retq + %1 = srem i32 %x, -723 + ret i32 %1 +} + + +define i32 @fold_srem_negative_even(i32 %x) { +; CHECK-LABEL: fold_srem_negative_even: +; CHECK: # %bb.0: +; CHECK-NEXT: movslq %edi, %rax +; CHECK-NEXT: imulq $-47844377, %rax, %rcx # imm = 0xFD25F3E7 +; CHECK-NEXT: movq %rcx, %rdx +; CHECK-NEXT: shrq $63, %rdx +; CHECK-NEXT: sarq $40, %rcx +; CHECK-NEXT: addl %edx, %ecx +; CHECK-NEXT: imull $-22981, %ecx, %ecx # imm = 0xA63B +; CHECK-NEXT: subl %ecx, %eax +; CHECK-NEXT: # kill: def $eax killed $eax killed $rax +; CHECK-NEXT: retq + %1 = srem i32 %x, -22981 + ret i32 %1 +} + + +; Don't fold if we can combine srem with sdiv. +define i32 @combine_srem_sdiv(i32 %x) { +; CHECK-LABEL: combine_srem_sdiv: +; CHECK: # %bb.0: +; CHECK-NEXT: movslq %edi, %rax +; CHECK-NEXT: imulq $-1401515643, %rax, %rcx # imm = 0xAC769185 +; CHECK-NEXT: shrq $32, %rcx +; CHECK-NEXT: addl %eax, %ecx +; CHECK-NEXT: movl %ecx, %edx +; CHECK-NEXT: shrl $31, %edx +; CHECK-NEXT: sarl $6, %ecx +; CHECK-NEXT: addl %edx, %ecx +; CHECK-NEXT: imull $95, %ecx, %edx +; CHECK-NEXT: subl %edx, %eax +; CHECK-NEXT: addl %ecx, %eax +; CHECK-NEXT: # kill: def $eax killed $eax killed $rax +; CHECK-NEXT: retq + %1 = srem i32 %x, 95 + %2 = sdiv i32 %x, 95 + %3 = add i32 %1, %2 + ret i32 %3 +} + +; Don't fold for divisors that are a power of two. +define i32 @dont_fold_srem_power_of_two(i32 %x) { +; CHECK-LABEL: dont_fold_srem_power_of_two: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: leal 63(%rax), %ecx +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: cmovnsl %edi, %ecx +; CHECK-NEXT: andl $-64, %ecx +; CHECK-NEXT: subl %ecx, %eax +; CHECK-NEXT: # kill: def $eax killed $eax killed $rax +; CHECK-NEXT: retq + %1 = srem i32 %x, 64 + ret i32 %1 +} + +; Don't fold if the divisor is one. +define i32 @dont_fold_srem_one(i32 %x) { +; CHECK-LABEL: dont_fold_srem_one: +; CHECK: # %bb.0: +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: retq + %1 = srem i32 %x, 1 + ret i32 %1 +} + +; Don't fold if the divisor is 2^31. +define i32 @dont_fold_srem_i32_smax(i32 %x) { +; CHECK-LABEL: dont_fold_srem_i32_smax: +; CHECK: # %bb.0: +; CHECK-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-NEXT: leal 2147483647(%rdi), %eax +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: cmovnsl %edi, %eax +; CHECK-NEXT: andl $-2147483648, %eax # imm = 0x80000000 +; CHECK-NEXT: addl %edi, %eax +; CHECK-NEXT: retq + %1 = srem i32 %x, 2147483648 + ret i32 %1 +} + +; Don't fold i64 srem +define i64 @dont_fold_srem_i64(i64 %x) { +; CHECK-LABEL: dont_fold_srem_i64: +; CHECK: # %bb.0: +; CHECK-NEXT: movabsq $6023426636313322977, %rcx # imm = 0x5397829CBC14E5E1 +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: imulq %rcx +; CHECK-NEXT: movq %rdx, %rax +; CHECK-NEXT: shrq $63, %rax +; CHECK-NEXT: sarq $5, %rdx +; CHECK-NEXT: addq %rax, %rdx +; CHECK-NEXT: imulq $98, %rdx, %rax +; CHECK-NEXT: subq %rax, %rdi +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: retq + %1 = srem i64 %x, 98 + ret i64 %1 +} diff --git a/llvm/test/CodeGen/X86/srem-vector-lkk.ll b/llvm/test/CodeGen/X86/srem-vector-lkk.ll new file mode 100644 index 0000000000000..19f7932747687 --- /dev/null +++ b/llvm/test/CodeGen/X86/srem-vector-lkk.ll @@ -0,0 +1,556 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX1 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX2 + +define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) { +; SSE-LABEL: fold_srem_vec_1: +; SSE: # %bb.0: +; SSE-NEXT: pextrw $3, %xmm0, %eax +; SSE-NEXT: movswl %ax, %ecx +; SSE-NEXT: imull $32081, %ecx, %ecx # imm = 0x7D51 +; SSE-NEXT: shrl $16, %ecx +; SSE-NEXT: subl %eax, %ecx +; SSE-NEXT: movzwl %cx, %ecx +; SSE-NEXT: movswl %cx, %edx +; SSE-NEXT: shrl $15, %ecx +; SSE-NEXT: sarl $9, %edx +; SSE-NEXT: addl %ecx, %edx +; SSE-NEXT: imull $-1003, %edx, %ecx # imm = 0xFC15 +; SSE-NEXT: subl %ecx, %eax +; SSE-NEXT: movd %xmm0, %ecx +; SSE-NEXT: movswl %cx, %edx +; SSE-NEXT: imull $-21385, %edx, %edx # imm = 0xAC77 +; SSE-NEXT: shrl $16, %edx +; SSE-NEXT: addl %ecx, %edx +; SSE-NEXT: movzwl %dx, %edx +; SSE-NEXT: movswl %dx, %esi +; SSE-NEXT: shrl $15, %edx +; SSE-NEXT: sarl $6, %esi +; SSE-NEXT: addl %edx, %esi +; SSE-NEXT: imull $95, %esi, %edx +; SSE-NEXT: subl %edx, %ecx +; SSE-NEXT: movd %ecx, %xmm1 +; SSE-NEXT: pextrw $1, %xmm0, %ecx +; SSE-NEXT: movswl %cx, %edx +; SSE-NEXT: imull $-16913, %edx, %edx # imm = 0xBDEF +; SSE-NEXT: movl %edx, %esi +; SSE-NEXT: shrl $31, %esi +; SSE-NEXT: sarl $21, %edx +; SSE-NEXT: addl %esi, %edx +; SSE-NEXT: imull $-124, %edx, %edx +; SSE-NEXT: subl %edx, %ecx +; SSE-NEXT: pinsrw $1, %ecx, %xmm1 +; SSE-NEXT: pextrw $2, %xmm0, %ecx +; SSE-NEXT: movswl %cx, %edx +; SSE-NEXT: imull $2675, %edx, %edx # imm = 0xA73 +; SSE-NEXT: movl %edx, %esi +; SSE-NEXT: shrl $31, %esi +; SSE-NEXT: sarl $18, %edx +; SSE-NEXT: addl %esi, %edx +; SSE-NEXT: imull $98, %edx, %edx +; SSE-NEXT: subl %edx, %ecx +; SSE-NEXT: pinsrw $2, %ecx, %xmm1 +; SSE-NEXT: pinsrw $3, %eax, %xmm1 +; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: fold_srem_vec_1: +; AVX: # %bb.0: +; AVX-NEXT: vpextrw $3, %xmm0, %eax +; AVX-NEXT: movswl %ax, %ecx +; AVX-NEXT: imull $32081, %ecx, %ecx # imm = 0x7D51 +; AVX-NEXT: shrl $16, %ecx +; AVX-NEXT: subl %eax, %ecx +; AVX-NEXT: movzwl %cx, %ecx +; AVX-NEXT: movswl %cx, %edx +; AVX-NEXT: shrl $15, %ecx +; AVX-NEXT: sarl $9, %edx +; AVX-NEXT: addl %ecx, %edx +; AVX-NEXT: imull $-1003, %edx, %ecx # imm = 0xFC15 +; AVX-NEXT: subl %ecx, %eax +; AVX-NEXT: vmovd %xmm0, %ecx +; AVX-NEXT: movswl %cx, %edx +; AVX-NEXT: imull $-21385, %edx, %edx # imm = 0xAC77 +; AVX-NEXT: shrl $16, %edx +; AVX-NEXT: addl %ecx, %edx +; AVX-NEXT: movzwl %dx, %edx +; AVX-NEXT: movswl %dx, %esi +; AVX-NEXT: shrl $15, %edx +; AVX-NEXT: sarl $6, %esi +; AVX-NEXT: addl %edx, %esi +; AVX-NEXT: imull $95, %esi, %edx +; AVX-NEXT: subl %edx, %ecx +; AVX-NEXT: vmovd %ecx, %xmm1 +; AVX-NEXT: vpextrw $1, %xmm0, %ecx +; AVX-NEXT: movswl %cx, %edx +; AVX-NEXT: imull $-16913, %edx, %edx # imm = 0xBDEF +; AVX-NEXT: movl %edx, %esi +; AVX-NEXT: shrl $31, %esi +; AVX-NEXT: sarl $21, %edx +; AVX-NEXT: addl %esi, %edx +; AVX-NEXT: imull $-124, %edx, %edx +; AVX-NEXT: subl %edx, %ecx +; AVX-NEXT: vpinsrw $1, %ecx, %xmm1, %xmm1 +; AVX-NEXT: vpextrw $2, %xmm0, %ecx +; AVX-NEXT: movswl %cx, %edx +; AVX-NEXT: imull $2675, %edx, %edx # imm = 0xA73 +; AVX-NEXT: movl %edx, %esi +; AVX-NEXT: shrl $31, %esi +; AVX-NEXT: sarl $18, %edx +; AVX-NEXT: addl %esi, %edx +; AVX-NEXT: imull $98, %edx, %edx +; AVX-NEXT: subl %edx, %ecx +; AVX-NEXT: vpinsrw $2, %ecx, %xmm1, %xmm0 +; AVX-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0 +; AVX-NEXT: retq + %1 = srem <4 x i16> %x, + ret <4 x i16> %1 +} + +define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) { +; SSE-LABEL: fold_srem_vec_2: +; SSE: # %bb.0: +; SSE-NEXT: movdqa {{.*#+}} xmm1 = [44151,44151,44151,44151,44151,44151,44151,44151] +; SSE-NEXT: pmulhw %xmm0, %xmm1 +; SSE-NEXT: paddw %xmm0, %xmm1 +; SSE-NEXT: movdqa %xmm1, %xmm2 +; SSE-NEXT: psrlw $15, %xmm2 +; SSE-NEXT: psraw $6, %xmm1 +; SSE-NEXT: paddw %xmm2, %xmm1 +; SSE-NEXT: pmullw {{.*}}(%rip), %xmm1 +; SSE-NEXT: psubw %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: fold_srem_vec_2: +; AVX: # %bb.0: +; AVX-NEXT: vpmulhw {{.*}}(%rip), %xmm0, %xmm1 +; AVX-NEXT: vpaddw %xmm0, %xmm1, %xmm1 +; AVX-NEXT: vpsrlw $15, %xmm1, %xmm2 +; AVX-NEXT: vpsraw $6, %xmm1, %xmm1 +; AVX-NEXT: vpaddw %xmm2, %xmm1, %xmm1 +; AVX-NEXT: vpmullw {{.*}}(%rip), %xmm1, %xmm1 +; AVX-NEXT: vpsubw %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq + %1 = srem <4 x i16> %x, + ret <4 x i16> %1 +} + + +; Don't fold if we can combine srem with sdiv. +define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) { +; SSE-LABEL: combine_srem_sdiv: +; SSE: # %bb.0: +; SSE-NEXT: movdqa {{.*#+}} xmm1 = [44151,44151,44151,44151,44151,44151,44151,44151] +; SSE-NEXT: pmulhw %xmm0, %xmm1 +; SSE-NEXT: paddw %xmm0, %xmm1 +; SSE-NEXT: movdqa %xmm1, %xmm2 +; SSE-NEXT: psrlw $15, %xmm2 +; SSE-NEXT: psraw $6, %xmm1 +; SSE-NEXT: paddw %xmm2, %xmm1 +; SSE-NEXT: movdqa {{.*#+}} xmm2 = [95,95,95,95,95,95,95,95] +; SSE-NEXT: pmullw %xmm1, %xmm2 +; SSE-NEXT: psubw %xmm2, %xmm0 +; SSE-NEXT: paddw %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: combine_srem_sdiv: +; AVX: # %bb.0: +; AVX-NEXT: vpmulhw {{.*}}(%rip), %xmm0, %xmm1 +; AVX-NEXT: vpaddw %xmm0, %xmm1, %xmm1 +; AVX-NEXT: vpsrlw $15, %xmm1, %xmm2 +; AVX-NEXT: vpsraw $6, %xmm1, %xmm1 +; AVX-NEXT: vpaddw %xmm2, %xmm1, %xmm1 +; AVX-NEXT: vpmullw {{.*}}(%rip), %xmm1, %xmm2 +; AVX-NEXT: vpsubw %xmm2, %xmm0, %xmm0 +; AVX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq + %1 = srem <4 x i16> %x, + %2 = sdiv <4 x i16> %x, + %3 = add <4 x i16> %1, %2 + ret <4 x i16> %3 +} + +; Don't fold for divisors that are a power of two. +define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) { +; SSE-LABEL: dont_fold_srem_power_of_two: +; SSE: # %bb.0: +; SSE-NEXT: pextrw $1, %xmm0, %eax +; SSE-NEXT: leal 31(%rax), %ecx +; SSE-NEXT: testw %ax, %ax +; SSE-NEXT: cmovnsl %eax, %ecx +; SSE-NEXT: andl $-32, %ecx +; SSE-NEXT: subl %ecx, %eax +; SSE-NEXT: movd %xmm0, %ecx +; SSE-NEXT: leal 63(%rcx), %edx +; SSE-NEXT: testw %cx, %cx +; SSE-NEXT: cmovnsl %ecx, %edx +; SSE-NEXT: andl $-64, %edx +; SSE-NEXT: subl %edx, %ecx +; SSE-NEXT: movd %ecx, %xmm1 +; SSE-NEXT: pinsrw $1, %eax, %xmm1 +; SSE-NEXT: pextrw $2, %xmm0, %eax +; SSE-NEXT: leal 7(%rax), %ecx +; SSE-NEXT: testw %ax, %ax +; SSE-NEXT: cmovnsl %eax, %ecx +; SSE-NEXT: andl $-8, %ecx +; SSE-NEXT: subl %ecx, %eax +; SSE-NEXT: pinsrw $2, %eax, %xmm1 +; SSE-NEXT: pextrw $3, %xmm0, %eax +; SSE-NEXT: movswl %ax, %ecx +; SSE-NEXT: imull $-21385, %ecx, %ecx # imm = 0xAC77 +; SSE-NEXT: shrl $16, %ecx +; SSE-NEXT: addl %eax, %ecx +; SSE-NEXT: movzwl %cx, %ecx +; SSE-NEXT: movswl %cx, %edx +; SSE-NEXT: shrl $15, %ecx +; SSE-NEXT: sarl $6, %edx +; SSE-NEXT: addl %ecx, %edx +; SSE-NEXT: imull $95, %edx, %ecx +; SSE-NEXT: subl %ecx, %eax +; SSE-NEXT: pinsrw $3, %eax, %xmm1 +; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: dont_fold_srem_power_of_two: +; AVX: # %bb.0: +; AVX-NEXT: vpextrw $1, %xmm0, %eax +; AVX-NEXT: leal 31(%rax), %ecx +; AVX-NEXT: testw %ax, %ax +; AVX-NEXT: cmovnsl %eax, %ecx +; AVX-NEXT: andl $-32, %ecx +; AVX-NEXT: subl %ecx, %eax +; AVX-NEXT: vmovd %xmm0, %ecx +; AVX-NEXT: leal 63(%rcx), %edx +; AVX-NEXT: testw %cx, %cx +; AVX-NEXT: cmovnsl %ecx, %edx +; AVX-NEXT: andl $-64, %edx +; AVX-NEXT: subl %edx, %ecx +; AVX-NEXT: vmovd %ecx, %xmm1 +; AVX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm1 +; AVX-NEXT: vpextrw $2, %xmm0, %eax +; AVX-NEXT: leal 7(%rax), %ecx +; AVX-NEXT: testw %ax, %ax +; AVX-NEXT: cmovnsl %eax, %ecx +; AVX-NEXT: andl $-8, %ecx +; AVX-NEXT: subl %ecx, %eax +; AVX-NEXT: vpinsrw $2, %eax, %xmm1, %xmm1 +; AVX-NEXT: vpextrw $3, %xmm0, %eax +; AVX-NEXT: movswl %ax, %ecx +; AVX-NEXT: imull $-21385, %ecx, %ecx # imm = 0xAC77 +; AVX-NEXT: shrl $16, %ecx +; AVX-NEXT: addl %eax, %ecx +; AVX-NEXT: movzwl %cx, %ecx +; AVX-NEXT: movswl %cx, %edx +; AVX-NEXT: shrl $15, %ecx +; AVX-NEXT: sarl $6, %edx +; AVX-NEXT: addl %ecx, %edx +; AVX-NEXT: imull $95, %edx, %ecx +; AVX-NEXT: subl %ecx, %eax +; AVX-NEXT: vpinsrw $3, %eax, %xmm1, %xmm0 +; AVX-NEXT: retq + %1 = srem <4 x i16> %x, + ret <4 x i16> %1 +} + +; Don't fold if the divisor is one. +define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) { +; SSE-LABEL: dont_fold_srem_one: +; SSE: # %bb.0: +; SSE-NEXT: pextrw $2, %xmm0, %eax +; SSE-NEXT: movswl %ax, %ecx +; SSE-NEXT: imull $-19945, %ecx, %ecx # imm = 0xB217 +; SSE-NEXT: shrl $16, %ecx +; SSE-NEXT: addl %eax, %ecx +; SSE-NEXT: movzwl %cx, %ecx +; SSE-NEXT: movswl %cx, %edx +; SSE-NEXT: shrl $15, %ecx +; SSE-NEXT: sarl $4, %edx +; SSE-NEXT: addl %ecx, %edx +; SSE-NEXT: leal (%rdx,%rdx,2), %ecx +; SSE-NEXT: shll $3, %ecx +; SSE-NEXT: subl %ecx, %edx +; SSE-NEXT: addl %eax, %edx +; SSE-NEXT: pextrw $1, %xmm0, %eax +; SSE-NEXT: movswl %ax, %ecx +; SSE-NEXT: imull $12827, %ecx, %ecx # imm = 0x321B +; SSE-NEXT: movl %ecx, %esi +; SSE-NEXT: shrl $31, %esi +; SSE-NEXT: sarl $23, %ecx +; SSE-NEXT: addl %esi, %ecx +; SSE-NEXT: imull $654, %ecx, %ecx # imm = 0x28E +; SSE-NEXT: subl %ecx, %eax +; SSE-NEXT: pxor %xmm1, %xmm1 +; SSE-NEXT: pinsrw $1, %eax, %xmm1 +; SSE-NEXT: pinsrw $2, %edx, %xmm1 +; SSE-NEXT: pextrw $3, %xmm0, %eax +; SSE-NEXT: movswl %ax, %ecx +; SSE-NEXT: imull $12375, %ecx, %ecx # imm = 0x3057 +; SSE-NEXT: movl %ecx, %edx +; SSE-NEXT: shrl $31, %edx +; SSE-NEXT: sarl $26, %ecx +; SSE-NEXT: addl %edx, %ecx +; SSE-NEXT: imull $5423, %ecx, %ecx # imm = 0x152F +; SSE-NEXT: subl %ecx, %eax +; SSE-NEXT: pinsrw $3, %eax, %xmm1 +; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: dont_fold_srem_one: +; AVX: # %bb.0: +; AVX-NEXT: vpextrw $2, %xmm0, %eax +; AVX-NEXT: movswl %ax, %ecx +; AVX-NEXT: imull $-19945, %ecx, %ecx # imm = 0xB217 +; AVX-NEXT: shrl $16, %ecx +; AVX-NEXT: addl %eax, %ecx +; AVX-NEXT: movzwl %cx, %ecx +; AVX-NEXT: movswl %cx, %edx +; AVX-NEXT: shrl $15, %ecx +; AVX-NEXT: sarl $4, %edx +; AVX-NEXT: addl %ecx, %edx +; AVX-NEXT: leal (%rdx,%rdx,2), %ecx +; AVX-NEXT: shll $3, %ecx +; AVX-NEXT: subl %ecx, %edx +; AVX-NEXT: addl %eax, %edx +; AVX-NEXT: vpextrw $1, %xmm0, %eax +; AVX-NEXT: movswl %ax, %ecx +; AVX-NEXT: imull $12827, %ecx, %ecx # imm = 0x321B +; AVX-NEXT: movl %ecx, %esi +; AVX-NEXT: shrl $31, %esi +; AVX-NEXT: sarl $23, %ecx +; AVX-NEXT: addl %esi, %ecx +; AVX-NEXT: imull $654, %ecx, %ecx # imm = 0x28E +; AVX-NEXT: subl %ecx, %eax +; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm1 +; AVX-NEXT: vpinsrw $2, %edx, %xmm1, %xmm1 +; AVX-NEXT: vpextrw $3, %xmm0, %eax +; AVX-NEXT: movswl %ax, %ecx +; AVX-NEXT: imull $12375, %ecx, %ecx # imm = 0x3057 +; AVX-NEXT: movl %ecx, %edx +; AVX-NEXT: shrl $31, %edx +; AVX-NEXT: sarl $26, %ecx +; AVX-NEXT: addl %edx, %ecx +; AVX-NEXT: imull $5423, %ecx, %ecx # imm = 0x152F +; AVX-NEXT: subl %ecx, %eax +; AVX-NEXT: vpinsrw $3, %eax, %xmm1, %xmm0 +; AVX-NEXT: retq + %1 = srem <4 x i16> %x, + ret <4 x i16> %1 +} + +; Don't fold if the divisor is 2^15. +define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) { +; SSE-LABEL: dont_fold_urem_i16_smax: +; SSE: # %bb.0: +; SSE-NEXT: pextrw $2, %xmm0, %eax +; SSE-NEXT: movswl %ax, %ecx +; SSE-NEXT: imull $-19945, %ecx, %ecx # imm = 0xB217 +; SSE-NEXT: shrl $16, %ecx +; SSE-NEXT: addl %eax, %ecx +; SSE-NEXT: movzwl %cx, %ecx +; SSE-NEXT: movswl %cx, %edx +; SSE-NEXT: shrl $15, %ecx +; SSE-NEXT: sarl $4, %edx +; SSE-NEXT: addl %ecx, %edx +; SSE-NEXT: leal (%rdx,%rdx,2), %ecx +; SSE-NEXT: shll $3, %ecx +; SSE-NEXT: subl %ecx, %edx +; SSE-NEXT: addl %eax, %edx +; SSE-NEXT: pextrw $1, %xmm0, %eax +; SSE-NEXT: leal 32767(%rax), %ecx +; SSE-NEXT: testw %ax, %ax +; SSE-NEXT: cmovnsl %eax, %ecx +; SSE-NEXT: andl $-32768, %ecx # imm = 0x8000 +; SSE-NEXT: addl %eax, %ecx +; SSE-NEXT: pxor %xmm1, %xmm1 +; SSE-NEXT: pinsrw $1, %ecx, %xmm1 +; SSE-NEXT: pinsrw $2, %edx, %xmm1 +; SSE-NEXT: pextrw $3, %xmm0, %eax +; SSE-NEXT: movswl %ax, %ecx +; SSE-NEXT: imull $12375, %ecx, %ecx # imm = 0x3057 +; SSE-NEXT: movl %ecx, %edx +; SSE-NEXT: shrl $31, %edx +; SSE-NEXT: sarl $26, %ecx +; SSE-NEXT: addl %edx, %ecx +; SSE-NEXT: imull $5423, %ecx, %ecx # imm = 0x152F +; SSE-NEXT: subl %ecx, %eax +; SSE-NEXT: pinsrw $3, %eax, %xmm1 +; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: dont_fold_urem_i16_smax: +; AVX: # %bb.0: +; AVX-NEXT: vpextrw $2, %xmm0, %eax +; AVX-NEXT: movswl %ax, %ecx +; AVX-NEXT: imull $-19945, %ecx, %ecx # imm = 0xB217 +; AVX-NEXT: shrl $16, %ecx +; AVX-NEXT: addl %eax, %ecx +; AVX-NEXT: movzwl %cx, %ecx +; AVX-NEXT: movswl %cx, %edx +; AVX-NEXT: shrl $15, %ecx +; AVX-NEXT: sarl $4, %edx +; AVX-NEXT: addl %ecx, %edx +; AVX-NEXT: leal (%rdx,%rdx,2), %ecx +; AVX-NEXT: shll $3, %ecx +; AVX-NEXT: subl %ecx, %edx +; AVX-NEXT: addl %eax, %edx +; AVX-NEXT: vpextrw $1, %xmm0, %eax +; AVX-NEXT: leal 32767(%rax), %ecx +; AVX-NEXT: testw %ax, %ax +; AVX-NEXT: cmovnsl %eax, %ecx +; AVX-NEXT: andl $-32768, %ecx # imm = 0x8000 +; AVX-NEXT: addl %eax, %ecx +; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vpinsrw $1, %ecx, %xmm1, %xmm1 +; AVX-NEXT: vpinsrw $2, %edx, %xmm1, %xmm1 +; AVX-NEXT: vpextrw $3, %xmm0, %eax +; AVX-NEXT: movswl %ax, %ecx +; AVX-NEXT: imull $12375, %ecx, %ecx # imm = 0x3057 +; AVX-NEXT: movl %ecx, %edx +; AVX-NEXT: shrl $31, %edx +; AVX-NEXT: sarl $26, %ecx +; AVX-NEXT: addl %edx, %ecx +; AVX-NEXT: imull $5423, %ecx, %ecx # imm = 0x152F +; AVX-NEXT: subl %ecx, %eax +; AVX-NEXT: vpinsrw $3, %eax, %xmm1, %xmm0 +; AVX-NEXT: retq + %1 = srem <4 x i16> %x, + ret <4 x i16> %1 +} + +; Don't fold i64 srem. +define <4 x i64> @dont_fold_srem_i64(<4 x i64> %x) { +; SSE-LABEL: dont_fold_srem_i64: +; SSE: # %bb.0: +; SSE-NEXT: movdqa %xmm1, %xmm2 +; SSE-NEXT: movq %xmm1, %rcx +; SSE-NEXT: movabsq $-5614226457215950491, %rdx # imm = 0xB21642C8590B2165 +; SSE-NEXT: movq %rcx, %rax +; SSE-NEXT: imulq %rdx +; SSE-NEXT: addq %rcx, %rdx +; SSE-NEXT: movq %rdx, %rax +; SSE-NEXT: shrq $63, %rax +; SSE-NEXT: sarq $4, %rdx +; SSE-NEXT: addq %rax, %rdx +; SSE-NEXT: leaq (%rdx,%rdx,2), %rax +; SSE-NEXT: shlq $3, %rax +; SSE-NEXT: subq %rax, %rdx +; SSE-NEXT: addq %rcx, %rdx +; SSE-NEXT: movq %rdx, %xmm1 +; SSE-NEXT: pextrq $1, %xmm2, %rcx +; SSE-NEXT: movabsq $6966426675817289639, %rdx # imm = 0x60ADB826E5E517A7 +; SSE-NEXT: movq %rcx, %rax +; SSE-NEXT: imulq %rdx +; SSE-NEXT: movq %rdx, %rax +; SSE-NEXT: shrq $63, %rax +; SSE-NEXT: sarq $11, %rdx +; SSE-NEXT: addq %rax, %rdx +; SSE-NEXT: imulq $5423, %rdx, %rax # imm = 0x152F +; SSE-NEXT: subq %rax, %rcx +; SSE-NEXT: movq %rcx, %xmm2 +; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; SSE-NEXT: pextrq $1, %xmm0, %rcx +; SSE-NEXT: movabsq $7220743857598845893, %rdx # imm = 0x64353C48064353C5 +; SSE-NEXT: movq %rcx, %rax +; SSE-NEXT: imulq %rdx +; SSE-NEXT: movq %rdx, %rax +; SSE-NEXT: shrq $63, %rax +; SSE-NEXT: sarq $8, %rdx +; SSE-NEXT: addq %rax, %rdx +; SSE-NEXT: imulq $654, %rdx, %rax # imm = 0x28E +; SSE-NEXT: subq %rax, %rcx +; SSE-NEXT: movq %rcx, %xmm0 +; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7] +; SSE-NEXT: retq +; +; AVX1-LABEL: dont_fold_srem_i64: +; AVX1: # %bb.0: +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX1-NEXT: vmovq %xmm1, %rcx +; AVX1-NEXT: movabsq $-5614226457215950491, %rdx # imm = 0xB21642C8590B2165 +; AVX1-NEXT: movq %rcx, %rax +; AVX1-NEXT: imulq %rdx +; AVX1-NEXT: addq %rcx, %rdx +; AVX1-NEXT: movq %rdx, %rax +; AVX1-NEXT: shrq $63, %rax +; AVX1-NEXT: sarq $4, %rdx +; AVX1-NEXT: addq %rax, %rdx +; AVX1-NEXT: leaq (%rdx,%rdx,2), %rax +; AVX1-NEXT: shlq $3, %rax +; AVX1-NEXT: subq %rax, %rdx +; AVX1-NEXT: addq %rcx, %rdx +; AVX1-NEXT: vmovq %rdx, %xmm2 +; AVX1-NEXT: vpextrq $1, %xmm1, %rcx +; AVX1-NEXT: movabsq $6966426675817289639, %rdx # imm = 0x60ADB826E5E517A7 +; AVX1-NEXT: movq %rcx, %rax +; AVX1-NEXT: imulq %rdx +; AVX1-NEXT: movq %rdx, %rax +; AVX1-NEXT: shrq $63, %rax +; AVX1-NEXT: sarq $11, %rdx +; AVX1-NEXT: addq %rax, %rdx +; AVX1-NEXT: imulq $5423, %rdx, %rax # imm = 0x152F +; AVX1-NEXT: subq %rax, %rcx +; AVX1-NEXT: vmovq %rcx, %xmm1 +; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] +; AVX1-NEXT: vpextrq $1, %xmm0, %rcx +; AVX1-NEXT: movabsq $7220743857598845893, %rdx # imm = 0x64353C48064353C5 +; AVX1-NEXT: movq %rcx, %rax +; AVX1-NEXT: imulq %rdx +; AVX1-NEXT: movq %rdx, %rax +; AVX1-NEXT: shrq $63, %rax +; AVX1-NEXT: sarq $8, %rdx +; AVX1-NEXT: addq %rax, %rdx +; AVX1-NEXT: imulq $654, %rdx, %rax # imm = 0x28E +; AVX1-NEXT: subq %rax, %rcx +; AVX1-NEXT: vmovq %rcx, %xmm0 +; AVX1-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7] +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: dont_fold_srem_i64: +; AVX2: # %bb.0: +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2-NEXT: vmovq %xmm1, %rcx +; AVX2-NEXT: movabsq $-5614226457215950491, %rdx # imm = 0xB21642C8590B2165 +; AVX2-NEXT: movq %rcx, %rax +; AVX2-NEXT: imulq %rdx +; AVX2-NEXT: addq %rcx, %rdx +; AVX2-NEXT: movq %rdx, %rax +; AVX2-NEXT: shrq $63, %rax +; AVX2-NEXT: sarq $4, %rdx +; AVX2-NEXT: addq %rax, %rdx +; AVX2-NEXT: leaq (%rdx,%rdx,2), %rax +; AVX2-NEXT: shlq $3, %rax +; AVX2-NEXT: subq %rax, %rdx +; AVX2-NEXT: addq %rcx, %rdx +; AVX2-NEXT: vmovq %rdx, %xmm2 +; AVX2-NEXT: vpextrq $1, %xmm1, %rcx +; AVX2-NEXT: movabsq $6966426675817289639, %rdx # imm = 0x60ADB826E5E517A7 +; AVX2-NEXT: movq %rcx, %rax +; AVX2-NEXT: imulq %rdx +; AVX2-NEXT: movq %rdx, %rax +; AVX2-NEXT: shrq $63, %rax +; AVX2-NEXT: sarq $11, %rdx +; AVX2-NEXT: addq %rax, %rdx +; AVX2-NEXT: imulq $5423, %rdx, %rax # imm = 0x152F +; AVX2-NEXT: subq %rax, %rcx +; AVX2-NEXT: vmovq %rcx, %xmm1 +; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] +; AVX2-NEXT: vpextrq $1, %xmm0, %rcx +; AVX2-NEXT: movabsq $7220743857598845893, %rdx # imm = 0x64353C48064353C5 +; AVX2-NEXT: movq %rcx, %rax +; AVX2-NEXT: imulq %rdx +; AVX2-NEXT: movq %rdx, %rax +; AVX2-NEXT: shrq $63, %rax +; AVX2-NEXT: sarq $8, %rdx +; AVX2-NEXT: addq %rax, %rdx +; AVX2-NEXT: imulq $654, %rdx, %rax # imm = 0x28E +; AVX2-NEXT: subq %rax, %rcx +; AVX2-NEXT: vmovq %rcx, %xmm0 +; AVX2-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7] +; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX2-NEXT: retq + %1 = srem <4 x i64> %x, + ret <4 x i64> %1 +} diff --git a/llvm/test/CodeGen/X86/urem-lkk.ll b/llvm/test/CodeGen/X86/urem-lkk.ll new file mode 100644 index 0000000000000..84b0d0d68a0af --- /dev/null +++ b/llvm/test/CodeGen/X86/urem-lkk.ll @@ -0,0 +1,108 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=CHECK + +define i32 @fold_urem_positive_odd(i32 %x) { +; CHECK-LABEL: fold_urem_positive_odd: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: movl %edi, %ecx +; CHECK-NEXT: imulq $1491936009, %rcx, %rcx # imm = 0x58ED2309 +; CHECK-NEXT: shrq $32, %rcx +; CHECK-NEXT: movl %edi, %edx +; CHECK-NEXT: subl %ecx, %edx +; CHECK-NEXT: shrl %edx +; CHECK-NEXT: addl %ecx, %edx +; CHECK-NEXT: shrl $6, %edx +; CHECK-NEXT: imull $95, %edx, %ecx +; CHECK-NEXT: subl %ecx, %eax +; CHECK-NEXT: retq + %1 = urem i32 %x, 95 + ret i32 %1 +} + + +define i32 @fold_urem_positive_even(i32 %x) { +; CHECK-LABEL: fold_urem_positive_even: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: movl %edi, %ecx +; CHECK-NEXT: movl $4149100483, %edx # imm = 0xF74E3FC3 +; CHECK-NEXT: imulq %rcx, %rdx +; CHECK-NEXT: shrq $42, %rdx +; CHECK-NEXT: imull $1060, %edx, %ecx # imm = 0x424 +; CHECK-NEXT: subl %ecx, %eax +; CHECK-NEXT: retq + %1 = urem i32 %x, 1060 + ret i32 %1 +} + + +; Don't fold if we can combine urem with udiv. +define i32 @combine_urem_udiv(i32 %x) { +; CHECK-LABEL: combine_urem_udiv: +; CHECK: # %bb.0: +; CHECK-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: imulq $1491936009, %rax, %rax # imm = 0x58ED2309 +; CHECK-NEXT: shrq $32, %rax +; CHECK-NEXT: movl %edi, %ecx +; CHECK-NEXT: subl %eax, %ecx +; CHECK-NEXT: shrl %ecx +; CHECK-NEXT: addl %eax, %ecx +; CHECK-NEXT: shrl $6, %ecx +; CHECK-NEXT: imull $95, %ecx, %eax +; CHECK-NEXT: subl %eax, %edi +; CHECK-NEXT: leal (%rdi,%rcx), %eax +; CHECK-NEXT: retq + %1 = urem i32 %x, 95 + %2 = udiv i32 %x, 95 + %3 = add i32 %1, %2 + ret i32 %3 +} + +; Don't fold for divisors that are a power of two. +define i32 @dont_fold_urem_power_of_two(i32 %x) { +; CHECK-LABEL: dont_fold_urem_power_of_two: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: andl $63, %eax +; CHECK-NEXT: retq + %1 = urem i32 %x, 64 + ret i32 %1 +} + +; Don't fold if the divisor is one. +define i32 @dont_fold_urem_one(i32 %x) { +; CHECK-LABEL: dont_fold_urem_one: +; CHECK: # %bb.0: +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: retq + %1 = urem i32 %x, 1 + ret i32 %1 +} + +; Don't fold if the divisor is 2^32. +define i32 @dont_fold_urem_i32_umax(i32 %x) { +; CHECK-LABEL: dont_fold_urem_i32_umax: +; CHECK: # %bb.0: +; CHECK-NEXT: retq + %1 = urem i32 %x, 4294967296 + ret i32 %1 +} + +; Don't fold i64 urem +define i64 @dont_fold_urem_i64(i64 %x) { +; CHECK-LABEL: dont_fold_urem_i64: +; CHECK: # %bb.0: +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: shrq %rax +; CHECK-NEXT: movabsq $6023426636313322977, %rcx # imm = 0x5397829CBC14E5E1 +; CHECK-NEXT: mulq %rcx +; CHECK-NEXT: shrq $4, %rdx +; CHECK-NEXT: imulq $98, %rdx, %rax +; CHECK-NEXT: subq %rax, %rdi +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: retq + %1 = urem i64 %x, 98 + ret i64 %1 +} diff --git a/llvm/test/CodeGen/X86/urem-vector-lkk.ll b/llvm/test/CodeGen/X86/urem-vector-lkk.ll new file mode 100644 index 0000000000000..65eb3557f4ce0 --- /dev/null +++ b/llvm/test/CodeGen/X86/urem-vector-lkk.ll @@ -0,0 +1,378 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX1 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX2 + +define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) { +; SSE-LABEL: fold_urem_vec_1: +; SSE: # %bb.0: +; SSE-NEXT: pextrw $1, %xmm0, %eax +; SSE-NEXT: movl %eax, %ecx +; SSE-NEXT: shrl $2, %ecx +; SSE-NEXT: imull $16913, %ecx, %ecx # imm = 0x4211 +; SSE-NEXT: shrl $19, %ecx +; SSE-NEXT: imull $124, %ecx, %ecx +; SSE-NEXT: subl %ecx, %eax +; SSE-NEXT: movd %xmm0, %ecx +; SSE-NEXT: movzwl %cx, %edx +; SSE-NEXT: imull $44151, %edx, %edx # imm = 0xAC77 +; SSE-NEXT: shrl $22, %edx +; SSE-NEXT: imull $95, %edx, %edx +; SSE-NEXT: subl %edx, %ecx +; SSE-NEXT: movd %ecx, %xmm1 +; SSE-NEXT: pinsrw $1, %eax, %xmm1 +; SSE-NEXT: pextrw $2, %xmm0, %eax +; SSE-NEXT: movl %eax, %ecx +; SSE-NEXT: shrl %ecx +; SSE-NEXT: imull $2675, %ecx, %ecx # imm = 0xA73 +; SSE-NEXT: shrl $17, %ecx +; SSE-NEXT: imull $98, %ecx, %ecx +; SSE-NEXT: subl %ecx, %eax +; SSE-NEXT: pinsrw $2, %eax, %xmm1 +; SSE-NEXT: pextrw $3, %xmm0, %eax +; SSE-NEXT: imull $1373, %eax, %ecx # imm = 0x55D +; SSE-NEXT: shrl $16, %ecx +; SSE-NEXT: movl %eax, %edx +; SSE-NEXT: subl %ecx, %edx +; SSE-NEXT: movzwl %dx, %edx +; SSE-NEXT: shrl %edx +; SSE-NEXT: addl %ecx, %edx +; SSE-NEXT: shrl $9, %edx +; SSE-NEXT: imull $1003, %edx, %ecx # imm = 0x3EB +; SSE-NEXT: subl %ecx, %eax +; SSE-NEXT: pinsrw $3, %eax, %xmm1 +; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: fold_urem_vec_1: +; AVX: # %bb.0: +; AVX-NEXT: vpextrw $1, %xmm0, %eax +; AVX-NEXT: movl %eax, %ecx +; AVX-NEXT: shrl $2, %ecx +; AVX-NEXT: imull $16913, %ecx, %ecx # imm = 0x4211 +; AVX-NEXT: shrl $19, %ecx +; AVX-NEXT: imull $124, %ecx, %ecx +; AVX-NEXT: subl %ecx, %eax +; AVX-NEXT: vmovd %xmm0, %ecx +; AVX-NEXT: movzwl %cx, %edx +; AVX-NEXT: imull $44151, %edx, %edx # imm = 0xAC77 +; AVX-NEXT: shrl $22, %edx +; AVX-NEXT: imull $95, %edx, %edx +; AVX-NEXT: subl %edx, %ecx +; AVX-NEXT: vmovd %ecx, %xmm1 +; AVX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm1 +; AVX-NEXT: vpextrw $2, %xmm0, %eax +; AVX-NEXT: movl %eax, %ecx +; AVX-NEXT: shrl %ecx +; AVX-NEXT: imull $2675, %ecx, %ecx # imm = 0xA73 +; AVX-NEXT: shrl $17, %ecx +; AVX-NEXT: imull $98, %ecx, %ecx +; AVX-NEXT: subl %ecx, %eax +; AVX-NEXT: vpinsrw $2, %eax, %xmm1, %xmm1 +; AVX-NEXT: vpextrw $3, %xmm0, %eax +; AVX-NEXT: imull $1373, %eax, %ecx # imm = 0x55D +; AVX-NEXT: shrl $16, %ecx +; AVX-NEXT: movl %eax, %edx +; AVX-NEXT: subl %ecx, %edx +; AVX-NEXT: movzwl %dx, %edx +; AVX-NEXT: shrl %edx +; AVX-NEXT: addl %ecx, %edx +; AVX-NEXT: shrl $9, %edx +; AVX-NEXT: imull $1003, %edx, %ecx # imm = 0x3EB +; AVX-NEXT: subl %ecx, %eax +; AVX-NEXT: vpinsrw $3, %eax, %xmm1, %xmm0 +; AVX-NEXT: retq + %1 = urem <4 x i16> %x, + ret <4 x i16> %1 +} + +define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) { +; SSE-LABEL: fold_urem_vec_2: +; SSE: # %bb.0: +; SSE-NEXT: movdqa {{.*#+}} xmm1 = [44151,44151,44151,44151,44151,44151,44151,44151] +; SSE-NEXT: pmulhuw %xmm0, %xmm1 +; SSE-NEXT: psrlw $6, %xmm1 +; SSE-NEXT: pmullw {{.*}}(%rip), %xmm1 +; SSE-NEXT: psubw %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: fold_urem_vec_2: +; AVX: # %bb.0: +; AVX-NEXT: vpmulhuw {{.*}}(%rip), %xmm0, %xmm1 +; AVX-NEXT: vpsrlw $6, %xmm1, %xmm1 +; AVX-NEXT: vpmullw {{.*}}(%rip), %xmm1, %xmm1 +; AVX-NEXT: vpsubw %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq + %1 = urem <4 x i16> %x, + ret <4 x i16> %1 +} + + +; Don't fold if we can combine urem with udiv. +define <4 x i16> @combine_urem_udiv(<4 x i16> %x) { +; SSE-LABEL: combine_urem_udiv: +; SSE: # %bb.0: +; SSE-NEXT: movdqa {{.*#+}} xmm1 = [44151,44151,44151,44151,44151,44151,44151,44151] +; SSE-NEXT: pmulhuw %xmm0, %xmm1 +; SSE-NEXT: psrlw $6, %xmm1 +; SSE-NEXT: movdqa {{.*#+}} xmm2 = [95,95,95,95,95,95,95,95] +; SSE-NEXT: pmullw %xmm1, %xmm2 +; SSE-NEXT: psubw %xmm2, %xmm0 +; SSE-NEXT: paddw %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: combine_urem_udiv: +; AVX: # %bb.0: +; AVX-NEXT: vpmulhuw {{.*}}(%rip), %xmm0, %xmm1 +; AVX-NEXT: vpsrlw $6, %xmm1, %xmm1 +; AVX-NEXT: vpmullw {{.*}}(%rip), %xmm1, %xmm2 +; AVX-NEXT: vpsubw %xmm2, %xmm0, %xmm0 +; AVX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq + %1 = urem <4 x i16> %x, + %2 = udiv <4 x i16> %x, + %3 = add <4 x i16> %1, %2 + ret <4 x i16> %3 +} + +; Don't fold for divisors that are a power of two. +define <4 x i16> @dont_fold_urem_power_of_two(<4 x i16> %x) { +; SSE-LABEL: dont_fold_urem_power_of_two: +; SSE: # %bb.0: +; SSE-NEXT: pextrw $3, %xmm0, %eax +; SSE-NEXT: imull $44151, %eax, %ecx # imm = 0xAC77 +; SSE-NEXT: shrl $22, %ecx +; SSE-NEXT: imull $95, %ecx, %ecx +; SSE-NEXT: subl %ecx, %eax +; SSE-NEXT: pextrw $1, %xmm0, %ecx +; SSE-NEXT: andl $31, %ecx +; SSE-NEXT: movd %xmm0, %edx +; SSE-NEXT: andl $63, %edx +; SSE-NEXT: movd %edx, %xmm1 +; SSE-NEXT: pinsrw $1, %ecx, %xmm1 +; SSE-NEXT: pextrw $2, %xmm0, %ecx +; SSE-NEXT: andl $7, %ecx +; SSE-NEXT: pinsrw $2, %ecx, %xmm1 +; SSE-NEXT: pinsrw $3, %eax, %xmm1 +; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: dont_fold_urem_power_of_two: +; AVX: # %bb.0: +; AVX-NEXT: vpextrw $3, %xmm0, %eax +; AVX-NEXT: imull $44151, %eax, %ecx # imm = 0xAC77 +; AVX-NEXT: shrl $22, %ecx +; AVX-NEXT: imull $95, %ecx, %ecx +; AVX-NEXT: subl %ecx, %eax +; AVX-NEXT: vpextrw $1, %xmm0, %ecx +; AVX-NEXT: andl $31, %ecx +; AVX-NEXT: vmovd %xmm0, %edx +; AVX-NEXT: andl $63, %edx +; AVX-NEXT: vmovd %edx, %xmm1 +; AVX-NEXT: vpinsrw $1, %ecx, %xmm1, %xmm1 +; AVX-NEXT: vpextrw $2, %xmm0, %ecx +; AVX-NEXT: andl $7, %ecx +; AVX-NEXT: vpinsrw $2, %ecx, %xmm1, %xmm0 +; AVX-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0 +; AVX-NEXT: retq + %1 = urem <4 x i16> %x, + ret <4 x i16> %1 +} + +; Don't fold if the divisor is one. +define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) { +; SSE-LABEL: dont_fold_urem_one: +; SSE: # %bb.0: +; SSE-NEXT: pextrw $2, %xmm0, %eax +; SSE-NEXT: imull $25645, %eax, %ecx # imm = 0x642D +; SSE-NEXT: shrl $16, %ecx +; SSE-NEXT: movl %eax, %edx +; SSE-NEXT: subl %ecx, %edx +; SSE-NEXT: movzwl %dx, %edx +; SSE-NEXT: shrl %edx +; SSE-NEXT: addl %ecx, %edx +; SSE-NEXT: shrl $4, %edx +; SSE-NEXT: leal (%rdx,%rdx,2), %ecx +; SSE-NEXT: shll $3, %ecx +; SSE-NEXT: subl %ecx, %edx +; SSE-NEXT: addl %eax, %edx +; SSE-NEXT: pextrw $1, %xmm0, %eax +; SSE-NEXT: imull $51307, %eax, %ecx # imm = 0xC86B +; SSE-NEXT: shrl $25, %ecx +; SSE-NEXT: imull $654, %ecx, %ecx # imm = 0x28E +; SSE-NEXT: subl %ecx, %eax +; SSE-NEXT: pxor %xmm1, %xmm1 +; SSE-NEXT: pinsrw $1, %eax, %xmm1 +; SSE-NEXT: pinsrw $2, %edx, %xmm1 +; SSE-NEXT: pextrw $3, %xmm0, %eax +; SSE-NEXT: imull $12375, %eax, %ecx # imm = 0x3057 +; SSE-NEXT: shrl $26, %ecx +; SSE-NEXT: imull $5423, %ecx, %ecx # imm = 0x152F +; SSE-NEXT: subl %ecx, %eax +; SSE-NEXT: pinsrw $3, %eax, %xmm1 +; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: dont_fold_urem_one: +; AVX: # %bb.0: +; AVX-NEXT: vpextrw $2, %xmm0, %eax +; AVX-NEXT: imull $25645, %eax, %ecx # imm = 0x642D +; AVX-NEXT: shrl $16, %ecx +; AVX-NEXT: movl %eax, %edx +; AVX-NEXT: subl %ecx, %edx +; AVX-NEXT: movzwl %dx, %edx +; AVX-NEXT: shrl %edx +; AVX-NEXT: addl %ecx, %edx +; AVX-NEXT: shrl $4, %edx +; AVX-NEXT: leal (%rdx,%rdx,2), %ecx +; AVX-NEXT: shll $3, %ecx +; AVX-NEXT: subl %ecx, %edx +; AVX-NEXT: addl %eax, %edx +; AVX-NEXT: vpextrw $1, %xmm0, %eax +; AVX-NEXT: imull $51307, %eax, %ecx # imm = 0xC86B +; AVX-NEXT: shrl $25, %ecx +; AVX-NEXT: imull $654, %ecx, %ecx # imm = 0x28E +; AVX-NEXT: subl %ecx, %eax +; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm1 +; AVX-NEXT: vpinsrw $2, %edx, %xmm1, %xmm1 +; AVX-NEXT: vpextrw $3, %xmm0, %eax +; AVX-NEXT: imull $12375, %eax, %ecx # imm = 0x3057 +; AVX-NEXT: shrl $26, %ecx +; AVX-NEXT: imull $5423, %ecx, %ecx # imm = 0x152F +; AVX-NEXT: subl %ecx, %eax +; AVX-NEXT: vpinsrw $3, %eax, %xmm1, %xmm0 +; AVX-NEXT: retq + %1 = urem <4 x i16> %x, + ret <4 x i16> %1 +} + +; Don't fold if the divisor is 2^16. +define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) { +; CHECK-LABEL: dont_fold_urem_i16_smax: +; CHECK: # %bb.0: +; CHECK-NEXT: retq + %1 = urem <4 x i16> %x, + ret <4 x i16> %1 +} + +; Don't fold i64 urem. +define <4 x i64> @dont_fold_urem_i64(<4 x i64> %x) { +; SSE-LABEL: dont_fold_urem_i64: +; SSE: # %bb.0: +; SSE-NEXT: movq %xmm1, %rcx +; SSE-NEXT: movabsq $7218291159277650633, %rdx # imm = 0x642C8590B21642C9 +; SSE-NEXT: movq %rcx, %rax +; SSE-NEXT: mulq %rdx +; SSE-NEXT: movq %rcx, %rax +; SSE-NEXT: subq %rdx, %rax +; SSE-NEXT: shrq %rax +; SSE-NEXT: addq %rdx, %rax +; SSE-NEXT: shrq $4, %rax +; SSE-NEXT: leaq (%rax,%rax,2), %rdx +; SSE-NEXT: shlq $3, %rdx +; SSE-NEXT: subq %rdx, %rax +; SSE-NEXT: addq %rcx, %rax +; SSE-NEXT: movq %rax, %xmm2 +; SSE-NEXT: pextrq $1, %xmm1, %rcx +; SSE-NEXT: movabsq $-4513890722074972339, %rdx # imm = 0xC15B704DCBCA2F4D +; SSE-NEXT: movq %rcx, %rax +; SSE-NEXT: mulq %rdx +; SSE-NEXT: shrq $12, %rdx +; SSE-NEXT: imulq $5423, %rdx, %rax # imm = 0x152F +; SSE-NEXT: subq %rax, %rcx +; SSE-NEXT: movq %rcx, %xmm1 +; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0] +; SSE-NEXT: pextrq $1, %xmm0, %rcx +; SSE-NEXT: movq %rcx, %rax +; SSE-NEXT: shrq %rax +; SSE-NEXT: movabsq $7220743857598845893, %rdx # imm = 0x64353C48064353C5 +; SSE-NEXT: mulq %rdx +; SSE-NEXT: shrq $7, %rdx +; SSE-NEXT: imulq $654, %rdx, %rax # imm = 0x28E +; SSE-NEXT: subq %rax, %rcx +; SSE-NEXT: movq %rcx, %xmm0 +; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7] +; SSE-NEXT: movdqa %xmm2, %xmm1 +; SSE-NEXT: retq +; +; AVX1-LABEL: dont_fold_urem_i64: +; AVX1: # %bb.0: +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX1-NEXT: vmovq %xmm1, %rcx +; AVX1-NEXT: movabsq $7218291159277650633, %rdx # imm = 0x642C8590B21642C9 +; AVX1-NEXT: movq %rcx, %rax +; AVX1-NEXT: mulq %rdx +; AVX1-NEXT: movq %rcx, %rax +; AVX1-NEXT: subq %rdx, %rax +; AVX1-NEXT: shrq %rax +; AVX1-NEXT: addq %rdx, %rax +; AVX1-NEXT: shrq $4, %rax +; AVX1-NEXT: leaq (%rax,%rax,2), %rdx +; AVX1-NEXT: shlq $3, %rdx +; AVX1-NEXT: subq %rdx, %rax +; AVX1-NEXT: addq %rcx, %rax +; AVX1-NEXT: vmovq %rax, %xmm2 +; AVX1-NEXT: vpextrq $1, %xmm1, %rcx +; AVX1-NEXT: movabsq $-4513890722074972339, %rdx # imm = 0xC15B704DCBCA2F4D +; AVX1-NEXT: movq %rcx, %rax +; AVX1-NEXT: mulq %rdx +; AVX1-NEXT: shrq $12, %rdx +; AVX1-NEXT: imulq $5423, %rdx, %rax # imm = 0x152F +; AVX1-NEXT: subq %rax, %rcx +; AVX1-NEXT: vmovq %rcx, %xmm1 +; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] +; AVX1-NEXT: vpextrq $1, %xmm0, %rcx +; AVX1-NEXT: movq %rcx, %rax +; AVX1-NEXT: shrq %rax +; AVX1-NEXT: movabsq $7220743857598845893, %rdx # imm = 0x64353C48064353C5 +; AVX1-NEXT: mulq %rdx +; AVX1-NEXT: shrq $7, %rdx +; AVX1-NEXT: imulq $654, %rdx, %rax # imm = 0x28E +; AVX1-NEXT: subq %rax, %rcx +; AVX1-NEXT: vmovq %rcx, %xmm0 +; AVX1-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7] +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: dont_fold_urem_i64: +; AVX2: # %bb.0: +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2-NEXT: vmovq %xmm1, %rcx +; AVX2-NEXT: movabsq $7218291159277650633, %rdx # imm = 0x642C8590B21642C9 +; AVX2-NEXT: movq %rcx, %rax +; AVX2-NEXT: mulq %rdx +; AVX2-NEXT: movq %rcx, %rax +; AVX2-NEXT: subq %rdx, %rax +; AVX2-NEXT: shrq %rax +; AVX2-NEXT: addq %rdx, %rax +; AVX2-NEXT: shrq $4, %rax +; AVX2-NEXT: leaq (%rax,%rax,2), %rdx +; AVX2-NEXT: shlq $3, %rdx +; AVX2-NEXT: subq %rdx, %rax +; AVX2-NEXT: addq %rcx, %rax +; AVX2-NEXT: vmovq %rax, %xmm2 +; AVX2-NEXT: vpextrq $1, %xmm1, %rcx +; AVX2-NEXT: movabsq $-4513890722074972339, %rdx # imm = 0xC15B704DCBCA2F4D +; AVX2-NEXT: movq %rcx, %rax +; AVX2-NEXT: mulq %rdx +; AVX2-NEXT: shrq $12, %rdx +; AVX2-NEXT: imulq $5423, %rdx, %rax # imm = 0x152F +; AVX2-NEXT: subq %rax, %rcx +; AVX2-NEXT: vmovq %rcx, %xmm1 +; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] +; AVX2-NEXT: vpextrq $1, %xmm0, %rcx +; AVX2-NEXT: movq %rcx, %rax +; AVX2-NEXT: shrq %rax +; AVX2-NEXT: movabsq $7220743857598845893, %rdx # imm = 0x64353C48064353C5 +; AVX2-NEXT: mulq %rdx +; AVX2-NEXT: shrq $7, %rdx +; AVX2-NEXT: imulq $654, %rdx, %rax # imm = 0x28E +; AVX2-NEXT: subq %rax, %rcx +; AVX2-NEXT: vmovq %rcx, %xmm0 +; AVX2-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7] +; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX2-NEXT: retq + %1 = urem <4 x i64> %x, + ret <4 x i64> %1 +} \ No newline at end of file From 68f21b360b91df88b87aebec5c5dbc5d68d42a6d Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sat, 5 Oct 2019 16:08:17 +0000 Subject: [PATCH 043/254] Try to fix sphinx indentation error llvm-svn: 373831 --- clang/docs/ReleaseNotes.rst | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 95dd51e634755..d6c734a53a1d0 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -212,13 +212,15 @@ clang-format - The ``Standard`` style option specifies which version of C++ should be used when parsing and formatting C++ code. The set of allowed values has changed: + - ``Latest`` will always enable new C++ language features. - ``c++03``, ``c++11``, ``c++14``, ``c++17``, ``c++20`` will pin to exactly that language version. - ``Auto`` is the default and detects style from the code (this is unchanged). - The previous values of ``Cpp03`` and ``Cpp11`` are deprecated. Note that - ``Cpp11`` is treated as ``Latest``, as this was always clang-format's behavior. - (One motivation for this change is the new name describes the behavior better). + + The previous values of ``Cpp03`` and ``Cpp11`` are deprecated. Note that + ``Cpp11`` is treated as ``Latest``, as this was always clang-format's behavior. + (One motivation for this change is the new name describes the behavior better). libclang -------- From 9ecacb0d54fb89dc7e6da66d9ecae934ca5c01d4 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sat, 5 Oct 2019 16:08:30 +0000 Subject: [PATCH 044/254] [X86] lowerShuffleAsLanePermuteAndRepeatedMask - variable renames. NFCI. Rename some variables to match lowerShuffleAsRepeatedMaskAndLanePermute - prep work toward adding some equivalent sublane functionality. llvm-svn: 373832 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 54 ++++++++++++------------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 09d90ff4e7ec5..0c565e5e64fc1 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -14907,26 +14907,26 @@ static SDValue lowerShuffleAsLanePermuteAndRepeatedMask( if (is128BitLaneRepeatedShuffleMask(VT, Mask)) return SDValue(); - int Size = Mask.size(); + int NumElts = Mask.size(); int NumLanes = VT.getSizeInBits() / 128; - int LaneSize = 128 / VT.getScalarSizeInBits(); - SmallVector RepeatMask(LaneSize, -1); + int NumLaneElts = 128 / VT.getScalarSizeInBits(); + SmallVector RepeatMask(NumLaneElts, -1); SmallVector, 2> LaneSrcs(NumLanes, {{-1, -1}}); // First pass will try to fill in the RepeatMask from lanes that need two // sources. for (int Lane = 0; Lane != NumLanes; ++Lane) { - int Srcs[2] = { -1, -1 }; - SmallVector InLaneMask(LaneSize, -1); - for (int i = 0; i != LaneSize; ++i) { - int M = Mask[(Lane * LaneSize) + i]; + int Srcs[2] = {-1, -1}; + SmallVector InLaneMask(NumLaneElts, -1); + for (int i = 0; i != NumLaneElts; ++i) { + int M = Mask[(Lane * NumLaneElts) + i]; if (M < 0) continue; // Determine which of the possible input lanes (NumLanes from each source) // this element comes from. Assign that as one of the sources for this // lane. We can assign up to 2 sources for this lane. If we run out // sources we can't do anything. - int LaneSrc = M / LaneSize; + int LaneSrc = M / NumLaneElts; int Src; if (Srcs[0] < 0 || Srcs[0] == LaneSrc) Src = 0; @@ -14936,7 +14936,7 @@ static SDValue lowerShuffleAsLanePermuteAndRepeatedMask( return SDValue(); Srcs[Src] = LaneSrc; - InLaneMask[i] = (M % LaneSize) + Src * Size; + InLaneMask[i] = (M % NumLaneElts) + Src * NumElts; } // If this lane has two sources, see if it fits with the repeat mask so far. @@ -14992,23 +14992,23 @@ static SDValue lowerShuffleAsLanePermuteAndRepeatedMask( if (LaneSrcs[Lane][0] >= 0) continue; - for (int i = 0; i != LaneSize; ++i) { - int M = Mask[(Lane * LaneSize) + i]; + for (int i = 0; i != NumLaneElts; ++i) { + int M = Mask[(Lane * NumLaneElts) + i]; if (M < 0) continue; // If RepeatMask isn't defined yet we can define it ourself. if (RepeatMask[i] < 0) - RepeatMask[i] = M % LaneSize; + RepeatMask[i] = M % NumLaneElts; - if (RepeatMask[i] < Size) { - if (RepeatMask[i] != M % LaneSize) + if (RepeatMask[i] < NumElts) { + if (RepeatMask[i] != M % NumLaneElts) return SDValue(); - LaneSrcs[Lane][0] = M / LaneSize; + LaneSrcs[Lane][0] = M / NumLaneElts; } else { - if (RepeatMask[i] != ((M % LaneSize) + Size)) + if (RepeatMask[i] != ((M % NumLaneElts) + NumElts)) return SDValue(); - LaneSrcs[Lane][1] = M / LaneSize; + LaneSrcs[Lane][1] = M / NumLaneElts; } } @@ -15016,14 +15016,14 @@ static SDValue lowerShuffleAsLanePermuteAndRepeatedMask( return SDValue(); } - SmallVector NewMask(Size, -1); + SmallVector NewMask(NumElts, -1); for (int Lane = 0; Lane != NumLanes; ++Lane) { int Src = LaneSrcs[Lane][0]; - for (int i = 0; i != LaneSize; ++i) { + for (int i = 0; i != NumLaneElts; ++i) { int M = -1; if (Src >= 0) - M = Src * LaneSize + i; - NewMask[Lane * LaneSize + i] = M; + M = Src * NumLaneElts + i; + NewMask[Lane * NumLaneElts + i] = M; } } SDValue NewV1 = DAG.getVectorShuffle(VT, DL, V1, V2, NewMask); @@ -15036,11 +15036,11 @@ static SDValue lowerShuffleAsLanePermuteAndRepeatedMask( for (int Lane = 0; Lane != NumLanes; ++Lane) { int Src = LaneSrcs[Lane][1]; - for (int i = 0; i != LaneSize; ++i) { + for (int i = 0; i != NumLaneElts; ++i) { int M = -1; if (Src >= 0) - M = Src * LaneSize + i; - NewMask[Lane * LaneSize + i] = M; + M = Src * NumLaneElts + i; + NewMask[Lane * NumLaneElts + i] = M; } } SDValue NewV2 = DAG.getVectorShuffle(VT, DL, V1, V2, NewMask); @@ -15051,12 +15051,12 @@ static SDValue lowerShuffleAsLanePermuteAndRepeatedMask( cast(NewV2)->getMask() == Mask) return SDValue(); - for (int i = 0; i != Size; ++i) { - NewMask[i] = RepeatMask[i % LaneSize]; + for (int i = 0; i != NumElts; ++i) { + NewMask[i] = RepeatMask[i % NumLaneElts]; if (NewMask[i] < 0) continue; - NewMask[i] += (i / LaneSize) * LaneSize; + NewMask[i] += (i / NumLaneElts) * NumLaneElts; } return DAG.getVectorShuffle(VT, DL, NewV1, NewV2, NewMask); } From e2321bb4488a81b87742f3343e3bdf8e161aa35b Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Sat, 5 Oct 2019 18:03:58 +0000 Subject: [PATCH 045/254] [SLP] avoid reduction transform on patterns that the backend can load-combine I don't see an ideal solution to these 2 related, potentially large, perf regressions: https://bugs.llvm.org/show_bug.cgi?id=42708 https://bugs.llvm.org/show_bug.cgi?id=43146 We decided that load combining was unsuitable for IR because it could obscure other optimizations in IR. So we removed the LoadCombiner pass and deferred to the backend. Therefore, preventing SLP from destroying load combine opportunities requires that it recognizes patterns that could be combined later, but not do the optimization itself ( it's not a vector combine anyway, so it's probably out-of-scope for SLP). Here, we add a scalar cost model adjustment with a conservative pattern match and cost summation for a multi-instruction sequence that can probably be reduced later. This should prevent SLP from creating a vector reduction unless that sequence is extremely cheap. In the x86 tests shown (and discussed in more detail in the bug reports), SDAG combining will produce a single instruction on these tests like: movbe rax, qword ptr [rdi] or: mov rax, qword ptr [rdi] Not some (half) vector monstrosity as we currently do using SLP: vpmovzxbq ymm0, dword ptr [rdi + 1] # ymm0 = mem[0],zero,zero,.. vpsllvq ymm0, ymm0, ymmword ptr [rip + .LCPI0_0] movzx eax, byte ptr [rdi] movzx ecx, byte ptr [rdi + 5] shl rcx, 40 movzx edx, byte ptr [rdi + 6] shl rdx, 48 or rdx, rcx movzx ecx, byte ptr [rdi + 7] shl rcx, 56 or rcx, rdx or rcx, rax vextracti128 xmm1, ymm0, 1 vpor xmm0, xmm0, xmm1 vpshufd xmm1, xmm0, 78 # xmm1 = xmm0[2,3,0,1] vpor xmm0, xmm0, xmm1 vmovq rax, xmm0 or rax, rcx vzeroupper ret Differential Revision: https://reviews.llvm.org/D67841 llvm-svn: 373833 --- .../llvm/Analysis/TargetTransformInfo.h | 10 ++ llvm/lib/Analysis/TargetTransformInfo.cpp | 53 ++++++ .../Transforms/Vectorize/SLPVectorizer.cpp | 15 +- .../SLPVectorizer/X86/bad-reduction.ll | 156 ++++++++++++------ 4 files changed, 179 insertions(+), 55 deletions(-) diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index 6da2d7f43bc42..67e62f6f29bcb 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -1129,6 +1129,16 @@ class TargetTransformInfo { /// Returns -1 if the cost is unknown. int getInstructionThroughput(const Instruction *I) const; + /// Given an input value that is an element of an 'or' reduction, check if the + /// reduction is composed of narrower loaded values. Assuming that a + /// legal-sized reduction of shifted/zexted loaded values can be load combined + /// in the backend, create a relative cost that accounts for the removal of + /// the intermediate ops and replacement by a single wide load. + /// TODO: If load combining is allowed in the IR optimizer, this analysis + /// may not be necessary. + Optional getLoadCombineCost(unsigned Opcode, + ArrayRef Args) const; + /// The abstract base class used to type erase specific TTI /// implementations. class Concept; diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index f3d20ce984dbd..6730aa86a99a1 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -571,11 +571,64 @@ TargetTransformInfo::getOperandInfo(Value *V, OperandValueProperties &OpProps) { return OpInfo; } +Optional +TargetTransformInfo::getLoadCombineCost(unsigned Opcode, + ArrayRef Args) const { + if (Opcode != Instruction::Or) + return llvm::None; + if (Args.empty()) + return llvm::None; + + // Look past the reduction to find a source value. Arbitrarily follow the + // path through operand 0 of any 'or'. Also, peek through optional + // shift-left-by-constant. + const Value *ZextLoad = Args.front(); + while (match(ZextLoad, m_Or(m_Value(), m_Value())) || + match(ZextLoad, m_Shl(m_Value(), m_Constant()))) + ZextLoad = cast(ZextLoad)->getOperand(0); + + // Check if the input to the reduction is an extended load. + Value *LoadPtr; + if (!match(ZextLoad, m_ZExt(m_Load(m_Value(LoadPtr))))) + return llvm::None; + + // Require that the total load bit width is a legal integer type. + // For example, <8 x i8> --> i64 is a legal integer on a 64-bit target. + // But <16 x i8> --> i128 is not, so the backend probably can't reduce it. + Type *WideType = ZextLoad->getType(); + Type *EltType = LoadPtr->getType()->getPointerElementType(); + unsigned WideWidth = WideType->getIntegerBitWidth(); + unsigned EltWidth = EltType->getIntegerBitWidth(); + if (!isTypeLegal(WideType) || WideWidth % EltWidth != 0) + return llvm::None; + + // Calculate relative cost: {narrow load+zext+shl+or} are assumed to be + // removed and replaced by a single wide load. + // FIXME: This is not accurate for the larger pattern where we replace + // multiple narrow load sequences with just 1 wide load. We could + // remove the addition of the wide load cost here and expect the caller + // to make an adjustment for that. + int Cost = 0; + Cost -= getMemoryOpCost(Instruction::Load, EltType, 0, 0); + Cost -= getCastInstrCost(Instruction::ZExt, WideType, EltType); + Cost -= getArithmeticInstrCost(Instruction::Shl, WideType); + Cost -= getArithmeticInstrCost(Instruction::Or, WideType); + Cost += getMemoryOpCost(Instruction::Load, WideType, 0, 0); + return Cost; +} + + int TargetTransformInfo::getArithmeticInstrCost( unsigned Opcode, Type *Ty, OperandValueKind Opd1Info, OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo, OperandValueProperties Opd2PropInfo, ArrayRef Args) const { + // Check if we can match this instruction as part of a larger pattern. + Optional LoadCombineCost = getLoadCombineCost(Opcode, Args); + if (LoadCombineCost) + return LoadCombineCost.getValue(); + + // Fallback to implementation-specific overrides or base class. int Cost = TTIImpl->getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo, Args); assert(Cost >= 0 && "TTI should not produce negative costs!"); diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 99428c6c5dee3..ad12646bdeee6 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -6499,10 +6499,19 @@ class HorizontalReduction { int ScalarReduxCost = 0; switch (ReductionData.getKind()) { - case RK_Arithmetic: - ScalarReduxCost = - TTI->getArithmeticInstrCost(ReductionData.getOpcode(), ScalarTy); + case RK_Arithmetic: { + // Note: Passing in the reduction operands allows the cost model to match + // load combining patterns for this reduction. + auto *ReduxInst = cast(ReductionRoot); + SmallVector OperandList; + for (Value *Operand : ReduxInst->operands()) + OperandList.push_back(Operand); + ScalarReduxCost = TTI->getArithmeticInstrCost(ReductionData.getOpcode(), + ScalarTy, TargetTransformInfo::OK_AnyValue, + TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None, + TargetTransformInfo::OP_None, OperandList); break; + } case RK_Min: case RK_Max: case RK_UMin: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/bad-reduction.ll b/llvm/test/Transforms/SLPVectorizer/X86/bad-reduction.ll index e3452e194dbfb..c44a8524edfe5 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/bad-reduction.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/bad-reduction.ll @@ -15,31 +15,37 @@ define i64 @load_bswap(%v8i8* %p) { ; CHECK-NEXT: [[G5:%.*]] = getelementptr inbounds [[V8I8]], %v8i8* [[P]], i64 0, i32 5 ; CHECK-NEXT: [[G6:%.*]] = getelementptr inbounds [[V8I8]], %v8i8* [[P]], i64 0, i32 6 ; CHECK-NEXT: [[G7:%.*]] = getelementptr inbounds [[V8I8]], %v8i8* [[P]], i64 0, i32 7 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[G0]] to <4 x i8>* -; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1 +; CHECK-NEXT: [[T0:%.*]] = load i8, i8* [[G0]] +; CHECK-NEXT: [[T1:%.*]] = load i8, i8* [[G1]] +; CHECK-NEXT: [[T2:%.*]] = load i8, i8* [[G2]] +; CHECK-NEXT: [[T3:%.*]] = load i8, i8* [[G3]] ; CHECK-NEXT: [[T4:%.*]] = load i8, i8* [[G4]] ; CHECK-NEXT: [[T5:%.*]] = load i8, i8* [[G5]] ; CHECK-NEXT: [[T6:%.*]] = load i8, i8* [[G6]] ; CHECK-NEXT: [[T7:%.*]] = load i8, i8* [[G7]] -; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i64> +; CHECK-NEXT: [[Z0:%.*]] = zext i8 [[T0]] to i64 +; CHECK-NEXT: [[Z1:%.*]] = zext i8 [[T1]] to i64 +; CHECK-NEXT: [[Z2:%.*]] = zext i8 [[T2]] to i64 +; CHECK-NEXT: [[Z3:%.*]] = zext i8 [[T3]] to i64 ; CHECK-NEXT: [[Z4:%.*]] = zext i8 [[T4]] to i64 ; CHECK-NEXT: [[Z5:%.*]] = zext i8 [[T5]] to i64 ; CHECK-NEXT: [[Z6:%.*]] = zext i8 [[T6]] to i64 ; CHECK-NEXT: [[Z7:%.*]] = zext i8 [[T7]] to i64 -; CHECK-NEXT: [[TMP4:%.*]] = shl nuw <4 x i64> [[TMP3]], +; CHECK-NEXT: [[SH0:%.*]] = shl nuw i64 [[Z0]], 56 +; CHECK-NEXT: [[SH1:%.*]] = shl nuw nsw i64 [[Z1]], 48 +; CHECK-NEXT: [[SH2:%.*]] = shl nuw nsw i64 [[Z2]], 40 +; CHECK-NEXT: [[SH3:%.*]] = shl nuw nsw i64 [[Z3]], 32 ; CHECK-NEXT: [[SH4:%.*]] = shl nuw nsw i64 [[Z4]], 24 ; CHECK-NEXT: [[SH5:%.*]] = shl nuw nsw i64 [[Z5]], 16 ; CHECK-NEXT: [[SH6:%.*]] = shl nuw nsw i64 [[Z6]], 8 -; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> undef, <4 x i32> -; CHECK-NEXT: [[BIN_RDX:%.*]] = or <4 x i64> [[TMP4]], [[RDX_SHUF]] -; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i64> [[BIN_RDX]], <4 x i64> undef, <4 x i32> -; CHECK-NEXT: [[BIN_RDX2:%.*]] = or <4 x i64> [[BIN_RDX]], [[RDX_SHUF1]] -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[BIN_RDX2]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = or i64 [[TMP5]], [[SH4]] -; CHECK-NEXT: [[TMP7:%.*]] = or i64 [[TMP6]], [[SH5]] -; CHECK-NEXT: [[TMP8:%.*]] = or i64 [[TMP7]], [[SH6]] -; CHECK-NEXT: [[OP_EXTRA:%.*]] = or i64 [[TMP8]], [[Z7]] -; CHECK-NEXT: ret i64 [[OP_EXTRA]] +; CHECK-NEXT: [[OR01:%.*]] = or i64 [[SH0]], [[SH1]] +; CHECK-NEXT: [[OR012:%.*]] = or i64 [[OR01]], [[SH2]] +; CHECK-NEXT: [[OR0123:%.*]] = or i64 [[OR012]], [[SH3]] +; CHECK-NEXT: [[OR01234:%.*]] = or i64 [[OR0123]], [[SH4]] +; CHECK-NEXT: [[OR012345:%.*]] = or i64 [[OR01234]], [[SH5]] +; CHECK-NEXT: [[OR0123456:%.*]] = or i64 [[OR012345]], [[SH6]] +; CHECK-NEXT: [[OR01234567:%.*]] = or i64 [[OR0123456]], [[Z7]] +; CHECK-NEXT: ret i64 [[OR01234567]] ; %g0 = getelementptr inbounds %v8i8, %v8i8* %p, i64 0, i32 0 %g1 = getelementptr inbounds %v8i8, %v8i8* %p, i64 0, i32 1 @@ -97,18 +103,38 @@ define i64 @load_bswap_nop_shift(%v8i8* %p) { ; CHECK-NEXT: [[G5:%.*]] = getelementptr inbounds [[V8I8]], %v8i8* [[P]], i64 0, i32 5 ; CHECK-NEXT: [[G6:%.*]] = getelementptr inbounds [[V8I8]], %v8i8* [[P]], i64 0, i32 6 ; CHECK-NEXT: [[G7:%.*]] = getelementptr inbounds [[V8I8]], %v8i8* [[P]], i64 0, i32 7 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[G0]] to <8 x i8>* -; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1 -; CHECK-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i64> -; CHECK-NEXT: [[TMP4:%.*]] = shl nuw <8 x i64> [[TMP3]], -; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i64> [[TMP4]], <8 x i64> undef, <8 x i32> -; CHECK-NEXT: [[BIN_RDX:%.*]] = or <8 x i64> [[TMP4]], [[RDX_SHUF]] -; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i64> [[BIN_RDX]], <8 x i64> undef, <8 x i32> -; CHECK-NEXT: [[BIN_RDX2:%.*]] = or <8 x i64> [[BIN_RDX]], [[RDX_SHUF1]] -; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i64> [[BIN_RDX2]], <8 x i64> undef, <8 x i32> -; CHECK-NEXT: [[BIN_RDX4:%.*]] = or <8 x i64> [[BIN_RDX2]], [[RDX_SHUF3]] -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i64> [[BIN_RDX4]], i32 0 -; CHECK-NEXT: ret i64 [[TMP5]] +; CHECK-NEXT: [[T0:%.*]] = load i8, i8* [[G0]] +; CHECK-NEXT: [[T1:%.*]] = load i8, i8* [[G1]] +; CHECK-NEXT: [[T2:%.*]] = load i8, i8* [[G2]] +; CHECK-NEXT: [[T3:%.*]] = load i8, i8* [[G3]] +; CHECK-NEXT: [[T4:%.*]] = load i8, i8* [[G4]] +; CHECK-NEXT: [[T5:%.*]] = load i8, i8* [[G5]] +; CHECK-NEXT: [[T6:%.*]] = load i8, i8* [[G6]] +; CHECK-NEXT: [[T7:%.*]] = load i8, i8* [[G7]] +; CHECK-NEXT: [[Z0:%.*]] = zext i8 [[T0]] to i64 +; CHECK-NEXT: [[Z1:%.*]] = zext i8 [[T1]] to i64 +; CHECK-NEXT: [[Z2:%.*]] = zext i8 [[T2]] to i64 +; CHECK-NEXT: [[Z3:%.*]] = zext i8 [[T3]] to i64 +; CHECK-NEXT: [[Z4:%.*]] = zext i8 [[T4]] to i64 +; CHECK-NEXT: [[Z5:%.*]] = zext i8 [[T5]] to i64 +; CHECK-NEXT: [[Z6:%.*]] = zext i8 [[T6]] to i64 +; CHECK-NEXT: [[Z7:%.*]] = zext i8 [[T7]] to i64 +; CHECK-NEXT: [[SH0:%.*]] = shl nuw i64 [[Z0]], 56 +; CHECK-NEXT: [[SH1:%.*]] = shl nuw nsw i64 [[Z1]], 48 +; CHECK-NEXT: [[SH2:%.*]] = shl nuw nsw i64 [[Z2]], 40 +; CHECK-NEXT: [[SH3:%.*]] = shl nuw nsw i64 [[Z3]], 32 +; CHECK-NEXT: [[SH4:%.*]] = shl nuw nsw i64 [[Z4]], 24 +; CHECK-NEXT: [[SH5:%.*]] = shl nuw nsw i64 [[Z5]], 16 +; CHECK-NEXT: [[SH6:%.*]] = shl nuw nsw i64 [[Z6]], 8 +; CHECK-NEXT: [[SH7:%.*]] = shl nuw nsw i64 [[Z7]], 0 +; CHECK-NEXT: [[OR01:%.*]] = or i64 [[SH0]], [[SH1]] +; CHECK-NEXT: [[OR012:%.*]] = or i64 [[OR01]], [[SH2]] +; CHECK-NEXT: [[OR0123:%.*]] = or i64 [[OR012]], [[SH3]] +; CHECK-NEXT: [[OR01234:%.*]] = or i64 [[OR0123]], [[SH4]] +; CHECK-NEXT: [[OR012345:%.*]] = or i64 [[OR01234]], [[SH5]] +; CHECK-NEXT: [[OR0123456:%.*]] = or i64 [[OR012345]], [[SH6]] +; CHECK-NEXT: [[OR01234567:%.*]] = or i64 [[OR0123456]], [[SH7]] +; CHECK-NEXT: ret i64 [[OR01234567]] ; %g0 = getelementptr inbounds %v8i8, %v8i8* %p, i64 0, i32 0 %g1 = getelementptr inbounds %v8i8, %v8i8* %p, i64 0, i32 1 @@ -168,30 +194,36 @@ define i64 @load64le(i8* %arg) { ; CHECK-NEXT: [[G6:%.*]] = getelementptr inbounds i8, i8* [[ARG]], i64 6 ; CHECK-NEXT: [[G7:%.*]] = getelementptr inbounds i8, i8* [[ARG]], i64 7 ; CHECK-NEXT: [[LD0:%.*]] = load i8, i8* [[ARG]], align 1 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[G1]] to <4 x i8>* -; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1 +; CHECK-NEXT: [[LD1:%.*]] = load i8, i8* [[G1]], align 1 +; CHECK-NEXT: [[LD2:%.*]] = load i8, i8* [[G2]], align 1 +; CHECK-NEXT: [[LD3:%.*]] = load i8, i8* [[G3]], align 1 +; CHECK-NEXT: [[LD4:%.*]] = load i8, i8* [[G4]], align 1 ; CHECK-NEXT: [[LD5:%.*]] = load i8, i8* [[G5]], align 1 ; CHECK-NEXT: [[LD6:%.*]] = load i8, i8* [[G6]], align 1 ; CHECK-NEXT: [[LD7:%.*]] = load i8, i8* [[G7]], align 1 ; CHECK-NEXT: [[Z0:%.*]] = zext i8 [[LD0]] to i64 -; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i64> +; CHECK-NEXT: [[Z1:%.*]] = zext i8 [[LD1]] to i64 +; CHECK-NEXT: [[Z2:%.*]] = zext i8 [[LD2]] to i64 +; CHECK-NEXT: [[Z3:%.*]] = zext i8 [[LD3]] to i64 +; CHECK-NEXT: [[Z4:%.*]] = zext i8 [[LD4]] to i64 ; CHECK-NEXT: [[Z5:%.*]] = zext i8 [[LD5]] to i64 ; CHECK-NEXT: [[Z6:%.*]] = zext i8 [[LD6]] to i64 ; CHECK-NEXT: [[Z7:%.*]] = zext i8 [[LD7]] to i64 -; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw <4 x i64> [[TMP3]], +; CHECK-NEXT: [[S1:%.*]] = shl nuw nsw i64 [[Z1]], 8 +; CHECK-NEXT: [[S2:%.*]] = shl nuw nsw i64 [[Z2]], 16 +; CHECK-NEXT: [[S3:%.*]] = shl nuw nsw i64 [[Z3]], 24 +; CHECK-NEXT: [[S4:%.*]] = shl nuw nsw i64 [[Z4]], 32 ; CHECK-NEXT: [[S5:%.*]] = shl nuw nsw i64 [[Z5]], 40 ; CHECK-NEXT: [[S6:%.*]] = shl nuw nsw i64 [[Z6]], 48 ; CHECK-NEXT: [[S7:%.*]] = shl nuw i64 [[Z7]], 56 -; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> undef, <4 x i32> -; CHECK-NEXT: [[BIN_RDX:%.*]] = or <4 x i64> [[TMP4]], [[RDX_SHUF]] -; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i64> [[BIN_RDX]], <4 x i64> undef, <4 x i32> -; CHECK-NEXT: [[BIN_RDX2:%.*]] = or <4 x i64> [[BIN_RDX]], [[RDX_SHUF1]] -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[BIN_RDX2]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = or i64 [[TMP5]], [[S5]] -; CHECK-NEXT: [[TMP7:%.*]] = or i64 [[TMP6]], [[S6]] -; CHECK-NEXT: [[TMP8:%.*]] = or i64 [[TMP7]], [[S7]] -; CHECK-NEXT: [[OP_EXTRA:%.*]] = or i64 [[TMP8]], [[Z0]] -; CHECK-NEXT: ret i64 [[OP_EXTRA]] +; CHECK-NEXT: [[O1:%.*]] = or i64 [[S1]], [[Z0]] +; CHECK-NEXT: [[O2:%.*]] = or i64 [[O1]], [[S2]] +; CHECK-NEXT: [[O3:%.*]] = or i64 [[O2]], [[S3]] +; CHECK-NEXT: [[O4:%.*]] = or i64 [[O3]], [[S4]] +; CHECK-NEXT: [[O5:%.*]] = or i64 [[O4]], [[S5]] +; CHECK-NEXT: [[O6:%.*]] = or i64 [[O5]], [[S6]] +; CHECK-NEXT: [[O7:%.*]] = or i64 [[O6]], [[S7]] +; CHECK-NEXT: ret i64 [[O7]] ; %g1 = getelementptr inbounds i8, i8* %arg, i64 1 %g2 = getelementptr inbounds i8, i8* %arg, i64 2 @@ -247,18 +279,38 @@ define i64 @load64le_nop_shift(i8* %arg) { ; CHECK-NEXT: [[G5:%.*]] = getelementptr inbounds i8, i8* [[ARG]], i64 5 ; CHECK-NEXT: [[G6:%.*]] = getelementptr inbounds i8, i8* [[ARG]], i64 6 ; CHECK-NEXT: [[G7:%.*]] = getelementptr inbounds i8, i8* [[ARG]], i64 7 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[ARG]] to <8 x i8>* -; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1 -; CHECK-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i64> -; CHECK-NEXT: [[TMP4:%.*]] = shl nuw <8 x i64> [[TMP3]], -; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i64> [[TMP4]], <8 x i64> undef, <8 x i32> -; CHECK-NEXT: [[BIN_RDX:%.*]] = or <8 x i64> [[TMP4]], [[RDX_SHUF]] -; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i64> [[BIN_RDX]], <8 x i64> undef, <8 x i32> -; CHECK-NEXT: [[BIN_RDX2:%.*]] = or <8 x i64> [[BIN_RDX]], [[RDX_SHUF1]] -; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i64> [[BIN_RDX2]], <8 x i64> undef, <8 x i32> -; CHECK-NEXT: [[BIN_RDX4:%.*]] = or <8 x i64> [[BIN_RDX2]], [[RDX_SHUF3]] -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i64> [[BIN_RDX4]], i32 0 -; CHECK-NEXT: ret i64 [[TMP5]] +; CHECK-NEXT: [[LD0:%.*]] = load i8, i8* [[ARG]], align 1 +; CHECK-NEXT: [[LD1:%.*]] = load i8, i8* [[G1]], align 1 +; CHECK-NEXT: [[LD2:%.*]] = load i8, i8* [[G2]], align 1 +; CHECK-NEXT: [[LD3:%.*]] = load i8, i8* [[G3]], align 1 +; CHECK-NEXT: [[LD4:%.*]] = load i8, i8* [[G4]], align 1 +; CHECK-NEXT: [[LD5:%.*]] = load i8, i8* [[G5]], align 1 +; CHECK-NEXT: [[LD6:%.*]] = load i8, i8* [[G6]], align 1 +; CHECK-NEXT: [[LD7:%.*]] = load i8, i8* [[G7]], align 1 +; CHECK-NEXT: [[Z0:%.*]] = zext i8 [[LD0]] to i64 +; CHECK-NEXT: [[Z1:%.*]] = zext i8 [[LD1]] to i64 +; CHECK-NEXT: [[Z2:%.*]] = zext i8 [[LD2]] to i64 +; CHECK-NEXT: [[Z3:%.*]] = zext i8 [[LD3]] to i64 +; CHECK-NEXT: [[Z4:%.*]] = zext i8 [[LD4]] to i64 +; CHECK-NEXT: [[Z5:%.*]] = zext i8 [[LD5]] to i64 +; CHECK-NEXT: [[Z6:%.*]] = zext i8 [[LD6]] to i64 +; CHECK-NEXT: [[Z7:%.*]] = zext i8 [[LD7]] to i64 +; CHECK-NEXT: [[S0:%.*]] = shl nuw nsw i64 [[Z0]], 0 +; CHECK-NEXT: [[S1:%.*]] = shl nuw nsw i64 [[Z1]], 8 +; CHECK-NEXT: [[S2:%.*]] = shl nuw nsw i64 [[Z2]], 16 +; CHECK-NEXT: [[S3:%.*]] = shl nuw nsw i64 [[Z3]], 24 +; CHECK-NEXT: [[S4:%.*]] = shl nuw nsw i64 [[Z4]], 32 +; CHECK-NEXT: [[S5:%.*]] = shl nuw nsw i64 [[Z5]], 40 +; CHECK-NEXT: [[S6:%.*]] = shl nuw nsw i64 [[Z6]], 48 +; CHECK-NEXT: [[S7:%.*]] = shl nuw i64 [[Z7]], 56 +; CHECK-NEXT: [[O1:%.*]] = or i64 [[S1]], [[S0]] +; CHECK-NEXT: [[O2:%.*]] = or i64 [[O1]], [[S2]] +; CHECK-NEXT: [[O3:%.*]] = or i64 [[O2]], [[S3]] +; CHECK-NEXT: [[O4:%.*]] = or i64 [[O3]], [[S4]] +; CHECK-NEXT: [[O5:%.*]] = or i64 [[O4]], [[S5]] +; CHECK-NEXT: [[O6:%.*]] = or i64 [[O5]], [[S6]] +; CHECK-NEXT: [[O7:%.*]] = or i64 [[O6]], [[S7]] +; CHECK-NEXT: ret i64 [[O7]] ; %g1 = getelementptr inbounds i8, i8* %arg, i64 1 %g2 = getelementptr inbounds i8, i8* %arg, i64 2 From 8815be04ec1f333564591d9593735f22efa9bee5 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sat, 5 Oct 2019 20:49:34 +0000 Subject: [PATCH 046/254] [X86][AVX] Push sign extensions of comparison bool results through bitops (PR42025) As discussed on PR42025, with more complex boolean math we can end up with many truncations/extensions of the comparison results through each bitop. This patch handles the cases introduced in combineBitcastvxi1 by pushing the sign extension through the AND/OR/XOR ops so its just the original SETCC ops that gets extended. Differential Revision: https://reviews.llvm.org/D68226 llvm-svn: 373834 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 32 +++- .../test/CodeGen/X86/bitcast-and-setcc-256.ll | 161 ++++++++---------- .../test/CodeGen/X86/bitcast-and-setcc-512.ll | 124 +++++++------- 3 files changed, 157 insertions(+), 160 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 0c565e5e64fc1..6f535617f1a76 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -35121,6 +35121,23 @@ static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size) { return false; } +// Helper to push sign extension of vXi1 SETCC result through bitops. +static SDValue signExtendBitcastSrcVector(SelectionDAG &DAG, EVT SExtVT, + SDValue Src, const SDLoc &DL) { + switch (Src.getOpcode()) { + case ISD::SETCC: + return DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src); + case ISD::AND: + case ISD::XOR: + case ISD::OR: + return DAG.getNode( + Src.getOpcode(), DL, SExtVT, + signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(0), DL), + signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(1), DL)); + } + llvm_unreachable("Unexpected node type for vXi1 sign extension"); +} + // Try to match patterns such as // (i16 bitcast (v16i1 x)) // -> @@ -35159,6 +35176,7 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, EVT VT, SDValue Src, // For example, t0 := (v8i16 sext(v8i1 x)) needs to be shuffled as: // (v16i8 shuffle <0,2,4,6,8,10,12,14,u,u,...,u> (v16i8 bitcast t0), undef) MVT SExtVT; + bool PropagateSExt = false; switch (SrcVT.getSimpleVT().SimpleTy) { default: return SDValue(); @@ -35169,8 +35187,10 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, EVT VT, SDValue Src, SExtVT = MVT::v4i32; // For cases such as (i4 bitcast (v4i1 setcc v4i64 v1, v2)) // sign-extend to a 256-bit operation to avoid truncation. - if (Subtarget.hasAVX() && checkBitcastSrcVectorSize(Src, 256)) + if (Subtarget.hasAVX() && checkBitcastSrcVectorSize(Src, 256)) { SExtVT = MVT::v4i64; + PropagateSExt = true; + } break; case MVT::v8i1: SExtVT = MVT::v8i16; @@ -35179,11 +35199,10 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, EVT VT, SDValue Src, // If the setcc operand is 128-bit, prefer sign-extending to 128-bit over // 256-bit because the shuffle is cheaper than sign extending the result of // the compare. - // TODO : use checkBitcastSrcVectorSize - if (Src.getOpcode() == ISD::SETCC && Subtarget.hasAVX() && - (Src.getOperand(0).getValueType().is256BitVector() || - Src.getOperand(0).getValueType().is512BitVector())) { + if (Subtarget.hasAVX() && (checkBitcastSrcVectorSize(Src, 256) || + checkBitcastSrcVectorSize(Src, 512))) { SExtVT = MVT::v8i32; + PropagateSExt = true; } break; case MVT::v16i1: @@ -35206,7 +35225,8 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, EVT VT, SDValue Src, return SDValue(); }; - SDValue V = DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src); + SDValue V = PropagateSExt ? signExtendBitcastSrcVector(DAG, SExtVT, Src, DL) + : DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src); if (SExtVT == MVT::v16i8 || SExtVT == MVT::v32i8 || SExtVT == MVT::v64i8) { V = getPMOVMSKB(DL, V, DAG, Subtarget); diff --git a/llvm/test/CodeGen/X86/bitcast-and-setcc-256.ll b/llvm/test/CodeGen/X86/bitcast-and-setcc-256.ll index b982cde2a957b..e50dca9646567 100644 --- a/llvm/test/CodeGen/X86/bitcast-and-setcc-256.ll +++ b/llvm/test/CodeGen/X86/bitcast-and-setcc-256.ll @@ -55,17 +55,17 @@ define i4 @v4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64> %c, <4 x i64> %d) { ; ; AVX1-LABEL: v4i64: ; AVX1: # %bb.0: -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5 +; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 +; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm5 ; AVX1-NEXT: vpcmpgtq %xmm4, %xmm5, %xmm4 -; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0 -; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm1 -; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4 -; AVX1-NEXT: vpcmpgtq %xmm1, %xmm4, %xmm1 ; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2 -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 -; AVX1-NEXT: vandpd %ymm1, %ymm0, %ymm0 +; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 +; AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3 +; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 +; AVX1-NEXT: vandpd %ymm2, %ymm0, %ymm0 ; AVX1-NEXT: vmovmskpd %ymm0, %eax ; AVX1-NEXT: # kill: def $al killed $al killed $eax ; AVX1-NEXT: vzeroupper @@ -73,9 +73,9 @@ define i4 @v4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64> %c, <4 x i64> %d) { ; ; AVX2-LABEL: v4i64: ; AVX2: # %bb.0: +; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2 ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm1 -; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vmovmskpd %ymm0, %eax ; AVX2-NEXT: # kill: def $al killed $al killed $eax ; AVX2-NEXT: vzeroupper @@ -121,9 +121,9 @@ define i4 @v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c, <4 x double> ; ; AVX12-LABEL: v4f64: ; AVX12: # %bb.0: +; AVX12-NEXT: vcmpltpd %ymm2, %ymm3, %ymm2 ; AVX12-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0 -; AVX12-NEXT: vcmpltpd %ymm2, %ymm3, %ymm1 -; AVX12-NEXT: vandpd %ymm1, %ymm0, %ymm0 +; AVX12-NEXT: vandpd %ymm2, %ymm0, %ymm0 ; AVX12-NEXT: vmovmskpd %ymm0, %eax ; AVX12-NEXT: # kill: def $al killed $al killed $eax ; AVX12-NEXT: vzeroupper @@ -241,32 +241,28 @@ define i8 @v8i32_and(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c, <8 x i32> %d) { ; ; AVX1-LABEL: v8i32_and: ; AVX1: # %bb.0: -; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm4 -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpcmpgtd %xmm3, %xmm2, %xmm1 -; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm1 -; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3 -; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 +; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm5 +; AVX1-NEXT: vpcmpgtd %xmm4, %xmm5, %xmm4 ; AVX1-NEXT: vpcmpgtd %xmm3, %xmm2, %xmm2 -; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpackssdw %xmm0, %xmm1, %xmm0 -; AVX1-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 -; AVX1-NEXT: vpmovmskb %xmm0, %eax +; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 +; AVX1-NEXT: vpcmpgtd %xmm3, %xmm4, %xmm3 +; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 +; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 +; AVX1-NEXT: vmovmskps %ymm0, %eax ; AVX1-NEXT: # kill: def $al killed $al killed $eax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; ; AVX2-LABEL: v8i32_and: ; AVX2: # %bb.0: +; AVX2-NEXT: vpcmpgtd %ymm3, %ymm2, %ymm2 ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpcmpgtd %ymm3, %ymm2, %ymm1 -; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 -; AVX2-NEXT: vpmovmskb %xmm0, %eax +; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vmovmskps %ymm0, %eax ; AVX2-NEXT: # kill: def $al killed $al killed $eax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -314,32 +310,28 @@ define i8 @v8i32_or(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c, <8 x i32> %d) { ; ; AVX1-LABEL: v8i32_or: ; AVX1: # %bb.0: -; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm4 -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpcmpgtd %xmm3, %xmm2, %xmm1 -; AVX1-NEXT: vpor %xmm1, %xmm4, %xmm1 -; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3 -; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 +; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm5 +; AVX1-NEXT: vpcmpgtd %xmm4, %xmm5, %xmm4 ; AVX1-NEXT: vpcmpgtd %xmm3, %xmm2, %xmm2 -; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpackssdw %xmm0, %xmm1, %xmm0 -; AVX1-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 -; AVX1-NEXT: vpmovmskb %xmm0, %eax +; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 +; AVX1-NEXT: vpcmpgtd %xmm3, %xmm4, %xmm3 +; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 +; AVX1-NEXT: vorps %ymm2, %ymm0, %ymm0 +; AVX1-NEXT: vmovmskps %ymm0, %eax ; AVX1-NEXT: # kill: def $al killed $al killed $eax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; ; AVX2-LABEL: v8i32_or: ; AVX2: # %bb.0: +; AVX2-NEXT: vpcmpgtd %ymm3, %ymm2, %ymm2 ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpcmpgtd %ymm3, %ymm2, %ymm1 -; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 -; AVX2-NEXT: vpmovmskb %xmm0, %eax +; AVX2-NEXT: vpor %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vmovmskps %ymm0, %eax ; AVX2-NEXT: # kill: def $al killed $al killed $eax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -395,42 +387,36 @@ define i8 @v8i32_or_and(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c, <8 x i32> %d, ; ; AVX1-LABEL: v8i32_or_and: ; AVX1: # %bb.0: -; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm6 -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm1 -; AVX1-NEXT: vpor %xmm1, %xmm6, %xmm1 -; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2 -; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3 +; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm6 +; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm7 +; AVX1-NEXT: vpcmpgtd %xmm6, %xmm7, %xmm6 ; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpackssdw %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm2, %ymm2 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm6 +; AVX1-NEXT: vpcmpgtd %xmm3, %xmm6, %xmm3 +; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 +; AVX1-NEXT: vorps %ymm2, %ymm0, %ymm0 ; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm1 ; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm2 ; AVX1-NEXT: vpcmpeqd %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vpcmpeqd %xmm5, %xmm4, %xmm2 -; AVX1-NEXT: vpackssdw %xmm1, %xmm2, %xmm1 -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 -; AVX1-NEXT: vpmovmskb %xmm0, %eax +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 +; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 +; AVX1-NEXT: vmovmskps %ymm0, %eax ; AVX1-NEXT: # kill: def $al killed $al killed $eax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; ; AVX2-LABEL: v8i32_or_and: ; AVX2: # %bb.0: +; AVX2-NEXT: vpcmpgtd %ymm2, %ymm3, %ymm2 ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpcmpgtd %ymm2, %ymm3, %ymm1 -; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpor %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpcmpeqd %ymm5, %ymm4, %ymm1 -; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 -; AVX2-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 -; AVX2-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 -; AVX2-NEXT: vpmovmskb %xmm0, %eax +; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vmovmskps %ymm0, %eax ; AVX2-NEXT: # kill: def $al killed $al killed $eax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -482,13 +468,10 @@ define i8 @v8f32_and(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x float> ; ; AVX12-LABEL: v8f32_and: ; AVX12: # %bb.0: +; AVX12-NEXT: vcmpltps %ymm2, %ymm3, %ymm2 ; AVX12-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 -; AVX12-NEXT: vcmpltps %ymm2, %ymm3, %ymm1 -; AVX12-NEXT: vandps %ymm1, %ymm0, %ymm0 -; AVX12-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX12-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 -; AVX12-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 -; AVX12-NEXT: vpmovmskb %xmm0, %eax +; AVX12-NEXT: vandps %ymm2, %ymm0, %ymm0 +; AVX12-NEXT: vmovmskps %ymm0, %eax ; AVX12-NEXT: # kill: def $al killed $al killed $eax ; AVX12-NEXT: vzeroupper ; AVX12-NEXT: retq @@ -536,13 +519,10 @@ define i8 @v8f32_xor(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x float> ; ; AVX12-LABEL: v8f32_xor: ; AVX12: # %bb.0: +; AVX12-NEXT: vcmpltps %ymm2, %ymm3, %ymm2 ; AVX12-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 -; AVX12-NEXT: vcmpltps %ymm2, %ymm3, %ymm1 -; AVX12-NEXT: vxorps %ymm1, %ymm0, %ymm0 -; AVX12-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX12-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 -; AVX12-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 -; AVX12-NEXT: vpmovmskb %xmm0, %eax +; AVX12-NEXT: vxorps %ymm2, %ymm0, %ymm0 +; AVX12-NEXT: vmovmskps %ymm0, %eax ; AVX12-NEXT: # kill: def $al killed $al killed $eax ; AVX12-NEXT: vzeroupper ; AVX12-NEXT: retq @@ -604,17 +584,12 @@ define i8 @v8f32_xor_and(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x fl ; ; AVX12-LABEL: v8f32_xor_and: ; AVX12: # %bb.0: +; AVX12-NEXT: vcmpeq_uqps %ymm3, %ymm2, %ymm2 ; AVX12-NEXT: vcmpnleps %ymm1, %ymm0, %ymm0 -; AVX12-NEXT: vcmpeq_uqps %ymm3, %ymm2, %ymm1 -; AVX12-NEXT: vxorps %ymm1, %ymm0, %ymm0 +; AVX12-NEXT: vxorps %ymm2, %ymm0, %ymm0 ; AVX12-NEXT: vcmpltps %ymm4, %ymm5, %ymm1 -; AVX12-NEXT: vextractf128 $1, %ymm1, %xmm2 -; AVX12-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 -; AVX12-NEXT: vextractf128 $1, %ymm0, %xmm2 -; AVX12-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 -; AVX12-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX12-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 -; AVX12-NEXT: vpmovmskb %xmm0, %eax +; AVX12-NEXT: vandps %ymm1, %ymm0, %ymm0 +; AVX12-NEXT: vmovmskps %ymm0, %eax ; AVX12-NEXT: # kill: def $al killed $al killed $eax ; AVX12-NEXT: vzeroupper ; AVX12-NEXT: retq diff --git a/llvm/test/CodeGen/X86/bitcast-and-setcc-512.ll b/llvm/test/CodeGen/X86/bitcast-and-setcc-512.ll index 9384b24ab28b9..29499848eb480 100644 --- a/llvm/test/CodeGen/X86/bitcast-and-setcc-512.ll +++ b/llvm/test/CodeGen/X86/bitcast-and-setcc-512.ll @@ -50,56 +50,45 @@ define i8 @v8i64(<8 x i64> %a, <8 x i64> %b, <8 x i64> %c, <8 x i64> %d) { ; ; AVX1-LABEL: v8i64: ; AVX1: # %bb.0: -; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm8 -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm9 +; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm8 +; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm9 ; AVX1-NEXT: vpcmpgtq %xmm8, %xmm9, %xmm8 +; AVX1-NEXT: vpcmpgtq %xmm7, %xmm5, %xmm5 +; AVX1-NEXT: vpackssdw %xmm8, %xmm5, %xmm8 +; AVX1-NEXT: vextractf128 $1, %ymm6, %xmm7 +; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm5 +; AVX1-NEXT: vpcmpgtq %xmm7, %xmm5, %xmm5 +; AVX1-NEXT: vpcmpgtq %xmm6, %xmm4, %xmm4 +; AVX1-NEXT: vpackssdw %xmm5, %xmm4, %xmm4 +; AVX1-NEXT: vinsertf128 $1, %xmm8, %ymm4, %ymm4 +; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm5 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm6 +; AVX1-NEXT: vpcmpgtq %xmm5, %xmm6, %xmm5 ; AVX1-NEXT: vpcmpgtq %xmm3, %xmm1, %xmm1 -; AVX1-NEXT: vpackssdw %xmm8, %xmm1, %xmm8 +; AVX1-NEXT: vpackssdw %xmm5, %xmm1, %xmm1 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX1-NEXT: vpcmpgtq %xmm3, %xmm1, %xmm1 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5 +; AVX1-NEXT: vpcmpgtq %xmm3, %xmm5, %xmm3 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpackssdw %xmm8, %xmm0, %xmm0 -; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm1 -; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm2 -; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1 -; AVX1-NEXT: vpcmpgtq %xmm7, %xmm5, %xmm2 -; AVX1-NEXT: vpackssdw %xmm1, %xmm2, %xmm1 -; AVX1-NEXT: vextractf128 $1, %ymm6, %xmm2 -; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm3 -; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vpcmpgtq %xmm6, %xmm4, %xmm3 -; AVX1-NEXT: vpackssdw %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vpackssdw %xmm1, %xmm2, %xmm1 -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpsllw $15, %xmm0, %xmm0 -; AVX1-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 -; AVX1-NEXT: vpmovmskb %xmm0, %eax +; AVX1-NEXT: vpackssdw %xmm3, %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: vandps %ymm4, %ymm0, %ymm0 +; AVX1-NEXT: vmovmskps %ymm0, %eax ; AVX1-NEXT: # kill: def $al killed $al killed $eax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; ; AVX2-LABEL: v8i64: ; AVX2: # %bb.0: +; AVX2-NEXT: vpcmpgtq %ymm7, %ymm5, %ymm5 +; AVX2-NEXT: vpcmpgtq %ymm6, %ymm4, %ymm4 +; AVX2-NEXT: vpackssdw %ymm5, %ymm4, %ymm4 ; AVX2-NEXT: vpcmpgtq %ymm3, %ymm1, %ymm1 -; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3 -; AVX2-NEXT: vpackssdw %xmm3, %xmm1, %xmm1 ; AVX2-NEXT: vpcmpgtq %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 -; AVX2-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq %ymm7, %ymm5, %ymm1 -; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 -; AVX2-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 -; AVX2-NEXT: vpcmpgtq %ymm6, %ymm4, %ymm2 -; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3 -; AVX2-NEXT: vpackssdw %xmm3, %xmm2, %xmm2 -; AVX2-NEXT: vpackssdw %xmm1, %xmm2, %xmm1 -; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpsllw $15, %xmm0, %xmm0 -; AVX2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 -; AVX2-NEXT: vpmovmskb %xmm0, %eax +; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpand %ymm4, %ymm0, %ymm0 +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] +; AVX2-NEXT: vmovmskps %ymm0, %eax ; AVX2-NEXT: # kill: def $al killed $al killed $eax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -171,29 +160,42 @@ define i8 @v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %c, <8 x double> ; SSE-NEXT: # kill: def $al killed $al killed $eax ; SSE-NEXT: retq ; -; AVX12-LABEL: v8f64: -; AVX12: # %bb.0: -; AVX12-NEXT: vcmpltpd %ymm1, %ymm3, %ymm1 -; AVX12-NEXT: vextractf128 $1, %ymm1, %xmm3 -; AVX12-NEXT: vpackssdw %xmm3, %xmm1, %xmm1 -; AVX12-NEXT: vcmpltpd %ymm0, %ymm2, %ymm0 -; AVX12-NEXT: vextractf128 $1, %ymm0, %xmm2 -; AVX12-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 -; AVX12-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 -; AVX12-NEXT: vcmpltpd %ymm5, %ymm7, %ymm1 -; AVX12-NEXT: vextractf128 $1, %ymm1, %xmm2 -; AVX12-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 -; AVX12-NEXT: vcmpltpd %ymm4, %ymm6, %ymm2 -; AVX12-NEXT: vextractf128 $1, %ymm2, %xmm3 -; AVX12-NEXT: vpackssdw %xmm3, %xmm2, %xmm2 -; AVX12-NEXT: vpackssdw %xmm1, %xmm2, %xmm1 -; AVX12-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX12-NEXT: vpsllw $15, %xmm0, %xmm0 -; AVX12-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 -; AVX12-NEXT: vpmovmskb %xmm0, %eax -; AVX12-NEXT: # kill: def $al killed $al killed $eax -; AVX12-NEXT: vzeroupper -; AVX12-NEXT: retq +; AVX1-LABEL: v8f64: +; AVX1: # %bb.0: +; AVX1-NEXT: vcmpltpd %ymm5, %ymm7, %ymm5 +; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm7 +; AVX1-NEXT: vpackssdw %xmm7, %xmm5, %xmm5 +; AVX1-NEXT: vcmpltpd %ymm4, %ymm6, %ymm4 +; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm6 +; AVX1-NEXT: vpackssdw %xmm6, %xmm4, %xmm4 +; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm4, %ymm4 +; AVX1-NEXT: vcmpltpd %ymm1, %ymm3, %ymm1 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 +; AVX1-NEXT: vpackssdw %xmm3, %xmm1, %xmm1 +; AVX1-NEXT: vcmpltpd %ymm0, %ymm2, %ymm0 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: vandps %ymm4, %ymm0, %ymm0 +; AVX1-NEXT: vmovmskps %ymm0, %eax +; AVX1-NEXT: # kill: def $al killed $al killed $eax +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: v8f64: +; AVX2: # %bb.0: +; AVX2-NEXT: vcmpltpd %ymm5, %ymm7, %ymm5 +; AVX2-NEXT: vcmpltpd %ymm4, %ymm6, %ymm4 +; AVX2-NEXT: vpackssdw %ymm5, %ymm4, %ymm4 +; AVX2-NEXT: vcmpltpd %ymm1, %ymm3, %ymm1 +; AVX2-NEXT: vcmpltpd %ymm0, %ymm2, %ymm0 +; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpand %ymm4, %ymm0, %ymm0 +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] +; AVX2-NEXT: vmovmskps %ymm0, %eax +; AVX2-NEXT: # kill: def $al killed $al killed $eax +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq ; ; AVX512F-LABEL: v8f64: ; AVX512F: # %bb.0: From 2decdf42b95a8bdcbd33cd73e82a4efc76b91494 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sat, 5 Oct 2019 23:21:17 +0000 Subject: [PATCH 047/254] [FastISel] Copy the inline assembly dialect to the INLINEASM instruction. Fixes PR43575. llvm-svn: 373836 --- llvm/lib/CodeGen/SelectionDAG/FastISel.cpp | 1 + llvm/test/CodeGen/X86/pr43575.ll | 14 ++++++++++++++ 2 files changed, 15 insertions(+) create mode 100644 llvm/test/CodeGen/X86/pr43575.ll diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp index 8904283ff8dae..0fd2bd7815ba1 100644 --- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -1303,6 +1303,7 @@ bool FastISel::selectCall(const User *I) { ExtraInfo |= InlineAsm::Extra_HasSideEffects; if (IA->isAlignStack()) ExtraInfo |= InlineAsm::Extra_IsAlignStack; + ExtraInfo |= IA->getDialect() * InlineAsm::Extra_AsmDialect; BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::INLINEASM)) diff --git a/llvm/test/CodeGen/X86/pr43575.ll b/llvm/test/CodeGen/X86/pr43575.ll new file mode 100644 index 0000000000000..00c70c6970972 --- /dev/null +++ b/llvm/test/CodeGen/X86/pr43575.ll @@ -0,0 +1,14 @@ +; RUN: llc < %s -mtriple=x86_64-apple-macosx10.14.0 -O0 | FileCheck %s + +define void @exit(i32 %status) +; CHECK-LABEL: exit: +; CHECK: ## %bb.0: +; CHECK: ## InlineAsm Start +; CHECK: movq $60, %rax +; CHECK: syscall +; CHECK: ## InlineAsm End +; CHECK: retq +{ + call void asm sideeffect inteldialect "mov rax, 60; syscall", ""() + ret void +} From 69c65a86097f11450a50af0c8213a0ee47983145 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sun, 6 Oct 2019 01:37:34 +0000 Subject: [PATCH 048/254] AMDGPU/GlobalISel: Fix RegBankSelect for sendmsg intrinsics This wasn't updated for the immarg handling change. llvm-svn: 373837 --- llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 11 +++++------ .../GlobalISel/regbankselect-amdgcn.s.sendmsg.mir | 13 ++++--------- .../regbankselect-amdgcn.s.sendmsghalt.mir | 15 +++++---------- .../AMDGPU/GlobalISel/regbankselect-constant.mir | 8 ++------ 4 files changed, 16 insertions(+), 31 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index ba0820ac41142..ad63532439cb4 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -307,16 +307,16 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappingsIntrinsicWSideEffects( case Intrinsic::amdgcn_s_sendmsg: case Intrinsic::amdgcn_s_sendmsghalt: { // FIXME: Should have no register for immediate - static const OpRegBankEntry<2> Table[2] = { + static const OpRegBankEntry<1> Table[2] = { // Perfectly legal. - { { AMDGPU::SGPRRegBankID, AMDGPU::SGPRRegBankID }, 1 }, + { { AMDGPU::SGPRRegBankID }, 1 }, // Need readlane - { { AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID }, 3 } + { { AMDGPU::VGPRRegBankID }, 3 } }; - const std::array RegSrcOpIdx = { { 1, 2 } }; - return addMappingFromTable<2>(MI, MRI, RegSrcOpIdx, makeArrayRef(Table)); + const std::array RegSrcOpIdx = { { 2 } }; + return addMappingFromTable<1>(MI, MRI, RegSrcOpIdx, makeArrayRef(Table)); } default: return RegisterBankInfo::getInstrAlternativeMappings(MI); @@ -2780,7 +2780,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { // This must be an SGPR, but accept a VGPR. unsigned Bank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI, AMDGPU::SGPRRegBankID); - OpdsMapping[1] = AMDGPU::getValueMapping(Bank, 32); OpdsMapping[2] = AMDGPU::getValueMapping(Bank, 32); break; } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.sendmsg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.sendmsg.mir index eaff3354b9849..b021fb7992b79 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.sendmsg.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.sendmsg.mir @@ -11,11 +11,9 @@ body: | liveins: $sgpr0 ; CHECK-LABEL: name: sendmsg_s ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.sendmsg), [[C]](s32), [[COPY]](s32) + ; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.sendmsg), 0, [[COPY]](s32) %0:_(s32) = COPY $sgpr0 - %1:_(s32) = G_CONSTANT i32 0 ; FIXME: Should not be a constant - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.sendmsg), %1, %0 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.sendmsg), 0, %0 ... --- @@ -27,11 +25,8 @@ body: | liveins: $vgpr0 ; CHECK-LABEL: name: sendmsg_v ; CHECK: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY]](s32), implicit $exec - ; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.sendmsg), [[C]](s32), [[V_READFIRSTLANE_B32_]] + ; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.sendmsg), 0, [[V_READFIRSTLANE_B32_]] %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_CONSTANT i32 0 ; FIXME: Should not be a constant - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.sendmsg), %1, %0 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.sendmsg), 0, %0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.sendmsghalt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.sendmsghalt.mir index 1ece5a9259be7..77214b9bb04f0 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.sendmsghalt.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.sendmsghalt.mir @@ -1,6 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs -o - %s | FileCheck %s -# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs -o - %s | FileCheck %s +# XUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs -o - %s | FileCheck %s --- name: sendmsghalt_s @@ -11,11 +11,9 @@ body: | liveins: $sgpr0 ; CHECK-LABEL: name: sendmsghalt_s ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.sendmsghalt), [[C]](s32), [[COPY]](s32) + ; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.sendmsghalt), 0, [[COPY]](s32) %0:_(s32) = COPY $sgpr0 - %1:_(s32) = G_CONSTANT i32 0 ; FIXME: Should not be a constant - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.sendmsghalt), %1, %0 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.sendmsghalt), 0, %0 ... --- @@ -27,11 +25,8 @@ body: | liveins: $vgpr0 ; CHECK-LABEL: name: sendmsghalt_v ; CHECK: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY]](s32), implicit $exec - ; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.sendmsghalt), [[C]](s32), [[V_READFIRSTLANE_B32_]] + ; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.sendmsghalt), 0, [[V_READFIRSTLANE_B32_]] %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_CONSTANT i32 0 ; FIXME: Should not be a constant - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.sendmsghalt), %1, %0 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.sendmsghalt), 0, %0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-constant.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-constant.mir index 6601a181d2494..45d809b786f11 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-constant.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-constant.mir @@ -26,12 +26,8 @@ body: | bb.0: ; CHECK-LABEL: name: test_constant_s32_sgpr_use ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 - ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.sendmsg), [[C1]](s32), [[C]](s32) + ; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.sendmsg), 0, [[C]](s32) %0:_(s32) = G_CONSTANT i32 1 - - ; FIXME: Should not be a constant - %1:_(s32) = G_CONSTANT i32 0 - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.sendmsg), %1, %0 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.sendmsg), 0, %0 ... From a5b9c756745ea88631277ab00c1b26f45f9d7e11 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sun, 6 Oct 2019 01:37:35 +0000 Subject: [PATCH 049/254] GlobalISel: Partially implement lower for G_EXTRACT Turn into shift and truncate. Doesn't yet handle pointers. llvm-svn: 373838 --- .../llvm/CodeGen/GlobalISel/LegalizerHelper.h | 1 + .../CodeGen/GlobalISel/LegalizerHelper.cpp | 35 ++++ .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 14 +- .../GlobalISel/artifact-combiner-extract.mir | 14 +- .../legalize-extract-vector-elt.mir | 20 +- .../AMDGPU/GlobalISel/legalize-extract.mir | 183 +++++++++++++++++- .../GlobalISel/legalize-shuffle-vector.mir | 20 +- 7 files changed, 262 insertions(+), 25 deletions(-) diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h index a5fab5aad1983..710a9f81d3ba2 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h @@ -230,6 +230,7 @@ class LegalizerHelper { LegalizeResult lowerUnmergeValues(MachineInstr &MI); LegalizeResult lowerShuffleVector(MachineInstr &MI); LegalizeResult lowerDynStackAlloc(MachineInstr &MI); + LegalizeResult lowerExtract(MachineInstr &MI); private: MachineRegisterInfo &MRI; diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index be3e58e165029..861c22d605ab2 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -2247,6 +2247,8 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { return lowerShuffleVector(MI); case G_DYN_STACKALLOC: return lowerDynStackAlloc(MI); + case G_EXTRACT: + return lowerExtract(MI); } } @@ -4099,3 +4101,36 @@ LegalizerHelper::lowerDynStackAlloc(MachineInstr &MI) { MI.eraseFromParent(); return Legalized; } + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerExtract(MachineInstr &MI) { + Register Dst = MI.getOperand(0).getReg(); + Register Src = MI.getOperand(1).getReg(); + unsigned Offset = MI.getOperand(2).getImm(); + + LLT DstTy = MRI.getType(Dst); + LLT SrcTy = MRI.getType(Src); + + if (DstTy.isScalar() && + (SrcTy.isScalar() || + (SrcTy.isVector() && DstTy == SrcTy.getElementType()))) { + LLT SrcIntTy = SrcTy; + if (!SrcTy.isScalar()) { + SrcIntTy = LLT::scalar(SrcTy.getSizeInBits()); + Src = MIRBuilder.buildBitcast(SrcIntTy, Src).getReg(0); + } + + if (Offset == 0) + MIRBuilder.buildTrunc(Dst, Src); + else { + auto ShiftAmt = MIRBuilder.buildConstant(SrcIntTy, Offset); + auto Shr = MIRBuilder.buildLShr(SrcIntTy, Src, ShiftAmt); + MIRBuilder.buildTrunc(Dst, Shr); + } + + MI.eraseFromParent(); + return Legalized; + } + + return UnableToLegalize; +} diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index f61c9753d2aec..ff029e5491c8c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -48,6 +48,12 @@ static LegalityPredicate isMultiple32(unsigned TypeIdx, }; } +static LegalityPredicate sizeIs(unsigned TypeIdx, unsigned Size) { + return [=](const LegalityQuery &Query) { + return Query.Types[TypeIdx].getSizeInBits() == Size; + }; +} + static LegalityPredicate isSmallOddVector(unsigned TypeIdx) { return [=](const LegalityQuery &Query) { const LLT Ty = Query.Types[TypeIdx]; @@ -903,7 +909,13 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, unsigned LitTyIdx = Op == G_EXTRACT ? 0 : 1; // FIXME: Doesn't handle extract of illegal sizes. - getActionDefinitionsBuilder(Op) + auto &Builder = getActionDefinitionsBuilder(Op); + + // FIXME: Cleanup when G_INSERT lowering implemented. + if (Op == G_EXTRACT) + Builder.lowerIf(all(typeIs(LitTyIdx, S16), sizeIs(BigTyIdx, 32))); + + Builder .legalIf([=](const LegalityQuery &Query) { const LLT BigTy = Query.Types[BigTyIdx]; const LLT LitTy = Query.Types[LitTyIdx]; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-extract.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-extract.mir index 4202c4d1348ef..63aabbcbb1135 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-extract.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-extract.mir @@ -417,9 +417,9 @@ body: | ; CHECK-LABEL: name: extract_s16_build_vector_v2s64_v2s16_v2s16_offset32 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[COPY1]](<2 x s16>), 0 - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16) - ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; CHECK: $vgpr0 = COPY [[COPY2]](s32) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<4 x s16>) = G_CONCAT_VECTORS %0, %1 @@ -437,9 +437,11 @@ body: | ; CHECK-LABEL: name: extract_s16_build_vector_v2s64_v2s16_v2s16_offset48 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[COPY1]](<2 x s16>), 16 - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16) - ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CHECK: $vgpr0 = COPY [[COPY2]](s32) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<4 x s16>) = G_CONCAT_VECTORS %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir index c71010238750c..bb1592ba5ef0d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir @@ -231,9 +231,9 @@ body: | ; CHECK-LABEL: name: extract_vector_elt_0_v2i16_i32 ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[DEF]](<2 x s16>), 0 - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16) - ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[DEF]](<2 x s16>) + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; CHECK: $vgpr0 = COPY [[COPY]](s32) %0:_(<2 x s16>) = G_IMPLICIT_DEF %1:_(s32) = G_CONSTANT i32 0 %2:_(s16) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -417,9 +417,9 @@ body: | ; CHECK-LABEL: name: extract_vector_elt_v2s16_idx0_i32 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[COPY]](<2 x s16>), 0 - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16) - ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; CHECK: $vgpr0 = COPY [[COPY1]](s32) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(s32) = G_CONSTANT i32 0 %2:_(s16) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -436,9 +436,11 @@ body: | ; CHECK-LABEL: name: extract_vector_elt_v2s16_idx1_i32 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[COPY]](<2 x s16>), 16 - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16) - ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CHECK: $vgpr0 = COPY [[COPY1]](s32) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(s32) = G_CONSTANT i32 1 %2:_(s16) = G_EXTRACT_VECTOR_ELT %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir index 4e7ddafbbe308..b3a14ce947d97 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir @@ -56,9 +56,8 @@ body: | ; CHECK-LABEL: name: test_extract_s16_s31_offset0 ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[TRUNC]](s32), 0 - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16) - ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) + ; CHECK: $vgpr0 = COPY [[COPY1]](s32) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s31) = G_TRUNC %0 %2:_(s16) = G_EXTRACT %1, 0 @@ -929,3 +928,181 @@ body: | %1:_(<2 x s16>) = G_EXTRACT %0, 0 $vgpr0 = COPY %1 ... + +--- +name: extract_s16_v2s16_offset0 +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: extract_s16_v2s16_offset0 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; CHECK: $vgpr0 = COPY [[COPY1]](s32) + %0:_(<2 x s16>) = COPY $vgpr0 + %1:_(s16) = G_EXTRACT %0, 0 + %2:_(s32) = G_ANYEXT %1 + $vgpr0 = COPY %2 +... + +--- +name: extract_s16_v2s16_offset1 +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: extract_s16_v2s16_offset1 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CHECK: $vgpr0 = COPY [[COPY1]](s32) + %0:_(<2 x s16>) = COPY $vgpr0 + %1:_(s16) = G_EXTRACT %0, 1 + %2:_(s32) = G_ANYEXT %1 + $vgpr0 = COPY %2 +... + +--- +name: extract_s16_v2s16_offset8 +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: extract_s16_v2s16_offset8 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CHECK: $vgpr0 = COPY [[COPY1]](s32) + %0:_(<2 x s16>) = COPY $vgpr0 + %1:_(s16) = G_EXTRACT %0, 8 + %2:_(s32) = G_ANYEXT %1 + $vgpr0 = COPY %2 +... + +--- +name: extract_s16_v2s16_offset16 +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: extract_s16_v2s16_offset16 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CHECK: $vgpr0 = COPY [[COPY1]](s32) + %0:_(<2 x s16>) = COPY $vgpr0 + %1:_(s16) = G_EXTRACT %0, 16 + %2:_(s32) = G_ANYEXT %1 + $vgpr0 = COPY %2 +... + +--- +name: extract_s16_s32_offset0 +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: extract_s16_s32_offset0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK: $vgpr0 = COPY [[COPY1]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s16) = G_EXTRACT %0, 0 + %2:_(s32) = G_ANYEXT %1 + $vgpr0 = COPY %2 +... + +--- +name: extract_s16_s32_offset1 +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: extract_s16_s32_offset1 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CHECK: $vgpr0 = COPY [[COPY1]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s16) = G_EXTRACT %0, 1 + %2:_(s32) = G_ANYEXT %1 + $vgpr0 = COPY %2 +... + +--- +name: extract_s16_s32_offset8 +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: extract_s16_s32_offset8 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CHECK: $vgpr0 = COPY [[COPY1]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s16) = G_EXTRACT %0, 8 + %2:_(s32) = G_ANYEXT %1 + $vgpr0 = COPY %2 +... + +--- +name: extract_s16_s32_offset16 +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: extract_s16_s32_offset16 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CHECK: $vgpr0 = COPY [[COPY1]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s16) = G_EXTRACT %0, 16 + %2:_(s32) = G_ANYEXT %1 + $vgpr0 = COPY %2 +... + +--- +name: extract_s16_p3_offset0 +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: extract_s16_p3_offset0 + ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; CHECK: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[COPY]](p3), 0 + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16) + ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + %0:_(p3) = COPY $vgpr0 + %1:_(s16) = G_EXTRACT %0, 0 + %2:_(s32) = G_ANYEXT %1 + $vgpr0 = COPY %2 +... + +--- +name: extract_s16_p3_offset1 +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: extract_s16_p3_offset1 + ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; CHECK: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[COPY]](p3), 1 + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16) + ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + %0:_(p3) = COPY $vgpr0 + %1:_(s16) = G_EXTRACT %0, 1 + %2:_(s32) = G_ANYEXT %1 + $vgpr0 = COPY %2 +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.mir index be49cb6817bb6..2bd357c5b846f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.mir @@ -216,9 +216,13 @@ body: | ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[COPY]](<2 x s16>), 0 - ; CHECK: [[EXTRACT1:%[0-9]+]]:_(s16) = G_EXTRACT [[COPY]](<2 x s16>), 16 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[EXTRACT]](s16), [[EXTRACT1]](s16) + ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; CHECK: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 @@ -239,9 +243,13 @@ body: | ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[COPY]](<2 x s16>), 16 - ; CHECK: [[EXTRACT1:%[0-9]+]]:_(s16) = G_EXTRACT [[COPY]](<2 x s16>), 0 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[EXTRACT]](s16), [[EXTRACT1]](s16) + ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; CHECK: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 From bcd6b1d209048036593255b672b6b9ac27ee3511 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sun, 6 Oct 2019 01:37:37 +0000 Subject: [PATCH 050/254] AMDGPU/GlobalISel: Lower G_ATOMIC_CMPXCHG_WITH_SUCCESS llvm-svn: 373839 --- .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 3 + .../legalize-atomic-cmpxchg-with-success.mir | 107 ++++++++++++++++++ 2 files changed, 110 insertions(+) create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomic-cmpxchg-with-success.mir diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index ff029e5491c8c..037e2e88a77ba 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -837,6 +837,9 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, getActionDefinitionsBuilder(G_ATOMICRMW_FADD) .legalFor({{S32, LocalPtr}}); + getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG_WITH_SUCCESS) + .lower(); + // TODO: Pointer types, any 32-bit or 64-bit vector getActionDefinitionsBuilder(G_SELECT) .legalForCartesianProduct({S32, S64, S16, V2S32, V2S16, V4S16, diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomic-cmpxchg-with-success.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomic-cmpxchg-with-success.mir new file mode 100644 index 0000000000000..b5fe7334e066a --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomic-cmpxchg-with-success.mir @@ -0,0 +1,107 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -O0 -run-pass=legalizer %s -o - | FileCheck %s + +--- +name: test_atomic_cmpxchg_with_success_s32_global +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 + + ; CHECK-LABEL: name: test_atomic_cmpxchg_with_success_s32_global + ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_ATOMIC_CMPXCHG [[COPY]](p1), [[COPY1]], [[COPY2]] :: (load store syncscope("agent-one-as") monotonic monotonic 4, addrspace 1) + ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[ATOMIC_CMPXCHG]](s32), [[COPY1]] + ; CHECK: S_ENDPGM 0, implicit [[ATOMIC_CMPXCHG]](s32), implicit [[ICMP]](s1) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(s32) = COPY $vgpr2 + %2:_(s32) = COPY $vgpr3 + %3:_(s32), %4:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS %0, %1, %2 :: (load store syncscope("agent-one-as") monotonic monotonic 4, addrspace 1) + S_ENDPGM 0, implicit %3, implicit %4 + +... + +--- +name: test_atomic_cmpxchg_with_success_s32_flat +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 + + ; CHECK-LABEL: name: test_atomic_cmpxchg_with_success_s32_flat + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_ATOMIC_CMPXCHG [[COPY]](p0), [[COPY1]], [[COPY2]] :: (load store syncscope("agent-one-as") monotonic monotonic 4) + ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[ATOMIC_CMPXCHG]](s32), [[COPY1]] + ; CHECK: S_ENDPGM 0, implicit [[ATOMIC_CMPXCHG]](s32), implicit [[ICMP]](s1) + %0:_(p0) = COPY $vgpr0_vgpr1 + %1:_(s32) = COPY $vgpr2 + %2:_(s32) = COPY $vgpr3 + %3:_(s32), %4:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS %0, %1, %2 :: (load store syncscope("agent-one-as") monotonic monotonic 4, addrspace 0) + S_ENDPGM 0, implicit %3, implicit %4 + +... + +--- +name: test_atomic_cmpxchg_with_success_s32_local +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; CHECK-LABEL: name: test_atomic_cmpxchg_with_success_s32_local + ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_ATOMIC_CMPXCHG [[COPY]](p3), [[COPY1]], [[COPY2]] :: (load store syncscope("agent-one-as") monotonic monotonic 4, addrspace 3) + ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[ATOMIC_CMPXCHG]](s32), [[COPY1]] + ; CHECK: S_ENDPGM 0, implicit [[ATOMIC_CMPXCHG]](s32), implicit [[ICMP]](s1) + %0:_(p3) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $vgpr2 + %3:_(s32), %4:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS %0, %1, %2 :: (load store syncscope("agent-one-as") monotonic monotonic 4, addrspace 3) + S_ENDPGM 0, implicit %3, implicit %4 + +... + +--- +name: test_atomic_cmpxchg_with_success_s64_global +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 + + ; CHECK-LABEL: name: test_atomic_cmpxchg_with_success_s64_global + ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 + ; CHECK: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s64) = G_ATOMIC_CMPXCHG [[COPY]](p1), [[COPY1]], [[COPY2]] :: (load store syncscope("agent-one-as") monotonic monotonic 8, addrspace 1) + ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[ATOMIC_CMPXCHG]](s64), [[COPY1]] + ; CHECK: S_ENDPGM 0, implicit [[ATOMIC_CMPXCHG]](s64), implicit [[ICMP]](s1) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(s64) = COPY $vgpr2_vgpr3 + %2:_(s64) = COPY $vgpr4_vgpr5 + %3:_(s64), %4:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS %0, %1, %2 :: (load store syncscope("agent-one-as") monotonic monotonic 8, addrspace 1) + S_ENDPGM 0, implicit %3, implicit %4 + +... + +--- +name: test_atomic_cmpxchg_with_success_s64_local +body: | + bb.0: + liveins: $vgpr0, $vgpr1_vgpr2, $vgpr3_vgpr4 + + ; CHECK-LABEL: name: test_atomic_cmpxchg_with_success_s64_local + ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr1_vgpr2 + ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr3_vgpr4 + ; CHECK: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s64) = G_ATOMIC_CMPXCHG [[COPY]](p3), [[COPY1]], [[COPY2]] :: (load store syncscope("agent-one-as") monotonic monotonic 8, addrspace 3) + ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[ATOMIC_CMPXCHG]](s64), [[COPY1]] + ; CHECK: S_ENDPGM 0, implicit [[ATOMIC_CMPXCHG]](s64), implicit [[ICMP]](s1) + %0:_(p3) = COPY $vgpr0 + %1:_(s64) = COPY $vgpr1_vgpr2 + %2:_(s64) = COPY $vgpr3_vgpr4 + %3:_(s64), %4:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS %0, %1, %2 :: (load store syncscope("agent-one-as") monotonic monotonic 8, addrspace 3) + S_ENDPGM 0, implicit %3, implicit %4 + +... From c0ec72d4f859fed86979ea63f708190cee3fc23e Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sun, 6 Oct 2019 01:37:38 +0000 Subject: [PATCH 051/254] AMDGPU/GlobalISel: RegBankSelect DS GWS intrinsics llvm-svn: 373840 --- .../Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 35 ++++++++ .../regbankselect-amdgcn.ds.gws.init.mir | 79 +++++++++++++++++++ .../regbankselect-amdgcn.ds.gws.sema.v.mir | 37 +++++++++ 3 files changed, 151 insertions(+) create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.gws.init.mir create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.gws.sema.v.mir diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index ad63532439cb4..13bc2980fe863 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -1823,6 +1823,21 @@ void AMDGPURegisterBankInfo::applyMappingImpl( constrainOpWithReadfirstlane(MI, MRI, 2); // M0 return; } + case Intrinsic::amdgcn_ds_gws_init: + case Intrinsic::amdgcn_ds_gws_barrier: + case Intrinsic::amdgcn_ds_gws_sema_br: { + // Only the first lane is executes, so readfirstlane is safe. + substituteSimpleCopyRegs(OpdMapper, 1); + constrainOpWithReadfirstlane(MI, MRI, 2); // M0 + return; + } + case Intrinsic::amdgcn_ds_gws_sema_v: + case Intrinsic::amdgcn_ds_gws_sema_p: + case Intrinsic::amdgcn_ds_gws_sema_release_all: { + // Only the first lane is executes, so readfirstlane is safe. + constrainOpWithReadfirstlane(MI, MRI, 1); // M0 + return; + } case Intrinsic::amdgcn_s_sendmsg: case Intrinsic::amdgcn_s_sendmsghalt: { // FIXME: Should this use a waterfall loop? @@ -2843,6 +2858,26 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size); break; } + case Intrinsic::amdgcn_ds_gws_init: + case Intrinsic::amdgcn_ds_gws_barrier: + case Intrinsic::amdgcn_ds_gws_sema_br: { + OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32); + + // This must be an SGPR, but accept a VGPR. + unsigned Bank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI, + AMDGPU::SGPRRegBankID); + OpdsMapping[2] = AMDGPU::getValueMapping(Bank, 32); + break; + } + case Intrinsic::amdgcn_ds_gws_sema_v: + case Intrinsic::amdgcn_ds_gws_sema_p: + case Intrinsic::amdgcn_ds_gws_sema_release_all: { + // This must be an SGPR, but accept a VGPR. + unsigned Bank = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI, + AMDGPU::SGPRRegBankID); + OpdsMapping[1] = AMDGPU::getValueMapping(Bank, 32); + break; + } default: if (const AMDGPU::RsrcIntrinsic *RSrcIntrin = AMDGPU::lookupRsrcIntrinsic(IntrID)) { diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.gws.init.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.gws.init.mir new file mode 100644 index 0000000000000..ff851cf91476c --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.gws.init.mir @@ -0,0 +1,79 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs -o - %s | FileCheck %s +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs -o - %s | FileCheck %s + +--- +name: ds_gws_init_s_s +legalized: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + ; CHECK-LABEL: name: ds_gws_init_s_s + ; CHECK: liveins: $sgpr0, $sgpr1 + ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.init), [[COPY2]](s32), [[COPY1]](s32) + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.init), %0, %1 +... + +--- +name: ds_gws_init_s_v +legalized: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0 + ; CHECK-LABEL: name: ds_gws_init_s_v + ; CHECK: liveins: $sgpr0, $vgpr0 + ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec + ; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.init), [[COPY2]](s32), [[V_READFIRSTLANE_B32_]] + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $vgpr0 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.init), %0, %1 +... + +--- +name: ds_gws_init_v_s +legalized: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0, $sgpr0 + ; CHECK-LABEL: name: ds_gws_init_v_s + ; CHECK: liveins: $vgpr0, $sgpr0 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.init), [[COPY]](s32), [[COPY1]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $sgpr0 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.init), %0, %1 +... + +--- +name: ds_gws_init_v_v +legalized: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + ; CHECK-LABEL: name: ds_gws_init_v_v + ; CHECK: liveins: $vgpr0, $vgpr1 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 + ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec + ; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.init), [[COPY]](s32), [[V_READFIRSTLANE_B32_]] + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.init), %0, %1 +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.gws.sema.v.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.gws.sema.v.mir new file mode 100644 index 0000000000000..5695b13b44035 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.gws.sema.v.mir @@ -0,0 +1,37 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs -o - %s | FileCheck %s +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs -o - %s | FileCheck %s + +--- +name: ds_gws_init_s +legalized: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0 + ; CHECK-LABEL: name: ds_gws_init_s + ; CHECK: liveins: $sgpr0 + ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.sema.v), [[COPY]](s32) + %0:_(s32) = COPY $sgpr0 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.sema.v), %0 +... + +--- +name: ds_gws_init_v +legalized: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: ds_gws_init_v + ; CHECK: liveins: $vgpr0 + ; CHECK: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY]](s32), implicit $exec + ; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.sema.v), [[V_READFIRSTLANE_B32_]] + %0:_(s32) = COPY $vgpr0 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.sema.v), %0 +... + From 786a3953baccc66f85e3a6c0053ce0f7d815de00 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sun, 6 Oct 2019 01:37:39 +0000 Subject: [PATCH 052/254] AMDGPU/GlobalISel: RegBankSelect mul24 intrinsics llvm-svn: 373841 --- llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index 13bc2980fe863..36bb2aae0c557 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -2575,6 +2575,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case Intrinsic::amdgcn_mbcnt_hi: case Intrinsic::amdgcn_ubfe: case Intrinsic::amdgcn_sbfe: + case Intrinsic::amdgcn_mul_u24: + case Intrinsic::amdgcn_mul_i24: case Intrinsic::amdgcn_lerp: case Intrinsic::amdgcn_sad_u8: case Intrinsic::amdgcn_msad_u8: From e59296a05191cd7cb23f9c444e8b173e479b49d7 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sun, 6 Oct 2019 01:41:22 +0000 Subject: [PATCH 053/254] AMDGPU/GlobalISel: Fall back on weird G_EXTRACT offsets llvm-svn: 373842 --- llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 3adbeabc61625..4148d1d0b5523 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -434,8 +434,11 @@ bool AMDGPUInstructionSelector::selectG_UADDO_USUBO(MachineInstr &I) const { bool AMDGPUInstructionSelector::selectG_EXTRACT(MachineInstr &I) const { MachineBasicBlock *BB = I.getParent(); - assert(I.getOperand(2).getImm() % 32 == 0); - unsigned SubReg = TRI.getSubRegFromChannel(I.getOperand(2).getImm() / 32); + unsigned Offset = I.getOperand(2).getImm(); + if (Offset % 32 != 0) + return false; + + unsigned SubReg = TRI.getSubRegFromChannel(Offset / 32); const DebugLoc &DL = I.getDebugLoc(); MachineInstr *Copy = BuildMI(*BB, &I, DL, TII.get(TargetOpcode::COPY), I.getOperand(0).getReg()) From c209598268b3c8923371c16c639c3d59dfd99690 Mon Sep 17 00:00:00 2001 From: Paul Hoad Date: Sun, 6 Oct 2019 09:37:58 +0000 Subject: [PATCH 054/254] [clang-format][docs] Fix the Google C++ and Chromium style guide URLs Summary: The Google C++ and Chromium style guides are broken in the clang-format docs. This patch updates them. Reviewers: djasper, MyDeveloperDay Reviewed By: MyDeveloperDay Subscribers: cfe-commits Tags: #clang Patch by: m4tx Differential Revision: https://reviews.llvm.org/D61256 llvm-svn: 373844 --- clang/docs/ClangFormatStyleOptions.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/docs/ClangFormatStyleOptions.rst b/clang/docs/ClangFormatStyleOptions.rst index 459805d312aca..3cd47d3c0ac08 100644 --- a/clang/docs/ClangFormatStyleOptions.rst +++ b/clang/docs/ClangFormatStyleOptions.rst @@ -141,7 +141,7 @@ the configuration (without a prefix: ``Auto``). `_ * ``Chromium`` A style complying with `Chromium's style guide - `_ + `_ * ``Mozilla`` A style complying with `Mozilla's style guide `_ From 7653ff398d28851b211e81fa6ab22dd94de16f92 Mon Sep 17 00:00:00 2001 From: David Zarzycki Date: Sun, 6 Oct 2019 10:25:52 +0000 Subject: [PATCH 055/254] [X86] Enable AVX512BW for memcmp() llvm-svn: 373845 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 9 +- llvm/test/CodeGen/X86/memcmp.ll | 20 +++- llvm/test/CodeGen/X86/setcc-wide-types.ll | 118 +++++++++++++++------- 3 files changed, 106 insertions(+), 41 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 6f535617f1a76..e1e3a4ca4865a 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -42354,10 +42354,12 @@ static SDValue combineVectorSizedSetCCEquality(SDNode *SetCC, SelectionDAG &DAG, if ((OpSize == 128 && Subtarget.hasSSE2()) || (OpSize == 256 && Subtarget.hasAVX2()) || (OpSize == 512 && Subtarget.useAVX512Regs())) { - EVT VecVT = OpSize == 512 ? MVT::v16i32 : + auto BW = Subtarget.hasBWI(); + EVT VecVT = OpSize == 512 ? (BW ? MVT::v64i8 : MVT::v16i32) : OpSize == 256 ? MVT::v32i8 : MVT::v16i8; - EVT CmpVT = OpSize == 512 ? MVT::v16i1 : VecVT; + EVT CmpVT = OpSize == 512 ? (BW ? MVT::v64i1 : MVT::v16i1) : VecVT; + SDValue Cmp; if (IsOrXorXorCCZero) { // This is a bitwise-combined equality comparison of 2 pairs of vectors: @@ -42377,6 +42379,9 @@ static SDValue combineVectorSizedSetCCEquality(SDNode *SetCC, SelectionDAG &DAG, Cmp = DAG.getSetCC(DL, CmpVT, VecX, VecY, ISD::SETEQ); } // For 512-bits we want to emit a setcc that will lower to kortest. + if (OpSize == 512 && BW) + return DAG.getSetCC(DL, VT, DAG.getBitcast(MVT::i64, Cmp), + DAG.getConstant(0xFFFFFFFFFFFFFFFF, DL, MVT::i64), CC); if (OpSize == 512) return DAG.getSetCC(DL, VT, DAG.getBitcast(MVT::i16, Cmp), DAG.getConstant(0xFFFF, DL, MVT::i16), CC); diff --git a/llvm/test/CodeGen/X86/memcmp.ll b/llvm/test/CodeGen/X86/memcmp.ll index 54bd3fc2e80a4..0077df867db4d 100644 --- a/llvm/test/CodeGen/X86/memcmp.ll +++ b/llvm/test/CodeGen/X86/memcmp.ll @@ -6,7 +6,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX --check-prefix=X64-AVX1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX --check-prefix=X64-AVX2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX512F -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512bw | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX512F --check-prefix=X64-AVX512BW +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512bw | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX512BW ; This tests codegen time inlining/optimization of memcmp ; rdar://6480398 @@ -1551,6 +1551,15 @@ define i1 @length64_eq(i8* %x, i8* %y) nounwind { ; X64-AVX512F-NEXT: setae %al ; X64-AVX512F-NEXT: vzeroupper ; X64-AVX512F-NEXT: retq +; +; X64-AVX512BW-LABEL: length64_eq: +; X64-AVX512BW: # %bb.0: +; X64-AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm0 +; X64-AVX512BW-NEXT: vpcmpeqb (%rsi), %zmm0, %k0 +; X64-AVX512BW-NEXT: kortestq %k0, %k0 +; X64-AVX512BW-NEXT: setae %al +; X64-AVX512BW-NEXT: vzeroupper +; X64-AVX512BW-NEXT: retq %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 64) nounwind %cmp = icmp ne i32 %call, 0 ret i1 %cmp @@ -1612,6 +1621,15 @@ define i1 @length64_eq_const(i8* %X) nounwind { ; X64-AVX512F-NEXT: setb %al ; X64-AVX512F-NEXT: vzeroupper ; X64-AVX512F-NEXT: retq +; +; X64-AVX512BW-LABEL: length64_eq_const: +; X64-AVX512BW: # %bb.0: +; X64-AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm0 +; X64-AVX512BW-NEXT: vpcmpeqb {{.*}}(%rip), %zmm0, %k0 +; X64-AVX512BW-NEXT: kortestq %k0, %k0 +; X64-AVX512BW-NEXT: setb %al +; X64-AVX512BW-NEXT: vzeroupper +; X64-AVX512BW-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 64) nounwind %c = icmp eq i32 %m, 0 ret i1 %c diff --git a/llvm/test/CodeGen/X86/setcc-wide-types.ll b/llvm/test/CodeGen/X86/setcc-wide-types.ll index d8176e488c1c2..58baea95fcd0b 100644 --- a/llvm/test/CodeGen/X86/setcc-wide-types.ll +++ b/llvm/test/CodeGen/X86/setcc-wide-types.ll @@ -319,14 +319,23 @@ define i32 @ne_i512(<8 x i64> %x, <8 x i64> %y) { ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; -; AVX512-LABEL: ne_i512: -; AVX512: # %bb.0: -; AVX512-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 -; AVX512-NEXT: xorl %eax, %eax -; AVX512-NEXT: kortestw %k0, %k0 -; AVX512-NEXT: setae %al -; AVX512-NEXT: vzeroupper -; AVX512-NEXT: retq +; AVX512F-LABEL: ne_i512: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 +; AVX512F-NEXT: xorl %eax, %eax +; AVX512F-NEXT: kortestw %k0, %k0 +; AVX512F-NEXT: setae %al +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: ne_i512: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 +; AVX512BW-NEXT: xorl %eax, %eax +; AVX512BW-NEXT: kortestq %k0, %k0 +; AVX512BW-NEXT: setae %al +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq %bcx = bitcast <8 x i64> %x to i512 %bcy = bitcast <8 x i64> %y to i512 %cmp = icmp ne i512 %bcx, %bcy @@ -464,14 +473,23 @@ define i32 @eq_i512(<8 x i64> %x, <8 x i64> %y) { ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; -; AVX512-LABEL: eq_i512: -; AVX512: # %bb.0: -; AVX512-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 -; AVX512-NEXT: xorl %eax, %eax -; AVX512-NEXT: kortestw %k0, %k0 -; AVX512-NEXT: setb %al -; AVX512-NEXT: vzeroupper -; AVX512-NEXT: retq +; AVX512F-LABEL: eq_i512: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 +; AVX512F-NEXT: xorl %eax, %eax +; AVX512F-NEXT: kortestw %k0, %k0 +; AVX512F-NEXT: setb %al +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: eq_i512: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 +; AVX512BW-NEXT: xorl %eax, %eax +; AVX512BW-NEXT: kortestq %k0, %k0 +; AVX512BW-NEXT: setb %al +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq %bcx = bitcast <8 x i64> %x to i512 %bcy = bitcast <8 x i64> %y to i512 %cmp = icmp eq i512 %bcx, %bcy @@ -804,17 +822,29 @@ define i32 @ne_i512_pair(i512* %a, i512* %b) { ; NO512-NEXT: setne %al ; NO512-NEXT: retq ; -; AVX512-LABEL: ne_i512_pair: -; AVX512: # %bb.0: -; AVX512-NEXT: vmovdqu64 (%rdi), %zmm0 -; AVX512-NEXT: vmovdqu64 64(%rdi), %zmm1 -; AVX512-NEXT: vpcmpeqd (%rsi), %zmm0, %k1 -; AVX512-NEXT: vpcmpeqd 64(%rsi), %zmm1, %k0 {%k1} -; AVX512-NEXT: xorl %eax, %eax -; AVX512-NEXT: kortestw %k0, %k0 -; AVX512-NEXT: setae %al -; AVX512-NEXT: vzeroupper -; AVX512-NEXT: retq +; AVX512F-LABEL: ne_i512_pair: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0 +; AVX512F-NEXT: vmovdqu64 64(%rdi), %zmm1 +; AVX512F-NEXT: vpcmpeqd (%rsi), %zmm0, %k1 +; AVX512F-NEXT: vpcmpeqd 64(%rsi), %zmm1, %k0 {%k1} +; AVX512F-NEXT: xorl %eax, %eax +; AVX512F-NEXT: kortestw %k0, %k0 +; AVX512F-NEXT: setae %al +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: ne_i512_pair: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm0 +; AVX512BW-NEXT: vmovdqu64 64(%rdi), %zmm1 +; AVX512BW-NEXT: vpcmpeqb (%rsi), %zmm0, %k1 +; AVX512BW-NEXT: vpcmpeqb 64(%rsi), %zmm1, %k0 {%k1} +; AVX512BW-NEXT: xorl %eax, %eax +; AVX512BW-NEXT: kortestq %k0, %k0 +; AVX512BW-NEXT: setae %al +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq %a0 = load i512, i512* %a %b0 = load i512, i512* %b %xor1 = xor i512 %a0, %b0 @@ -886,17 +916,29 @@ define i32 @eq_i512_pair(i512* %a, i512* %b) { ; NO512-NEXT: sete %al ; NO512-NEXT: retq ; -; AVX512-LABEL: eq_i512_pair: -; AVX512: # %bb.0: -; AVX512-NEXT: vmovdqu64 (%rdi), %zmm0 -; AVX512-NEXT: vmovdqu64 64(%rdi), %zmm1 -; AVX512-NEXT: vpcmpeqd (%rsi), %zmm0, %k1 -; AVX512-NEXT: vpcmpeqd 64(%rsi), %zmm1, %k0 {%k1} -; AVX512-NEXT: xorl %eax, %eax -; AVX512-NEXT: kortestw %k0, %k0 -; AVX512-NEXT: setb %al -; AVX512-NEXT: vzeroupper -; AVX512-NEXT: retq +; AVX512F-LABEL: eq_i512_pair: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0 +; AVX512F-NEXT: vmovdqu64 64(%rdi), %zmm1 +; AVX512F-NEXT: vpcmpeqd (%rsi), %zmm0, %k1 +; AVX512F-NEXT: vpcmpeqd 64(%rsi), %zmm1, %k0 {%k1} +; AVX512F-NEXT: xorl %eax, %eax +; AVX512F-NEXT: kortestw %k0, %k0 +; AVX512F-NEXT: setb %al +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: eq_i512_pair: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm0 +; AVX512BW-NEXT: vmovdqu64 64(%rdi), %zmm1 +; AVX512BW-NEXT: vpcmpeqb (%rsi), %zmm0, %k1 +; AVX512BW-NEXT: vpcmpeqb 64(%rsi), %zmm1, %k0 {%k1} +; AVX512BW-NEXT: xorl %eax, %eax +; AVX512BW-NEXT: kortestq %k0, %k0 +; AVX512BW-NEXT: setb %al +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq %a0 = load i512, i512* %a %b0 = load i512, i512* %b %xor1 = xor i512 %a0, %b0 From 032dd9b086c77eb330453063135530cb7321ecbd Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sun, 6 Oct 2019 12:38:38 +0000 Subject: [PATCH 056/254] [X86][SSE] matchVectorShuffleAsBlend - use Zeroable element mask directly. We can make use of the Zeroable mask to indicate which elements we can safely set to zero instead of creating a target shuffle mask on the fly. This allows us to remove createTargetShuffleMask. This is part of the work to fix PR43024 and allow us to use SimplifyDemandedElts to simplify shuffle chains - we need to get to a point where the target shuffle masks isn't adjusted by its source inputs in setTargetShuffleZeroElements but instead we cache them in a parallel Zeroable mask. llvm-svn: 373846 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 47 +++++++------------------ llvm/test/CodeGen/X86/packss.ll | 22 ++++++------ 2 files changed, 24 insertions(+), 45 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index e1e3a4ca4865a..e139d990c414e 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -10298,25 +10298,6 @@ static bool isTargetShuffleEquivalent(ArrayRef Mask, return true; } -// Merges a general DAG shuffle mask and zeroable bit mask into a target shuffle -// mask. -// TODO: Do we need this? It might be better to use Mask+Zeroable directly. -static SmallVector createTargetShuffleMask(ArrayRef Mask, - const APInt &Zeroable) { - int NumElts = Mask.size(); - assert(NumElts == (int)Zeroable.getBitWidth() && "Mismatch mask sizes"); - - SmallVector TargetMask(NumElts, SM_SentinelUndef); - for (int i = 0; i != NumElts; ++i) { - int M = Mask[i]; - if (M == SM_SentinelUndef) - continue; - assert(0 <= M && M < (2 * NumElts) && "Out of range shuffle index"); - TargetMask[i] = (Zeroable[i] ? SM_SentinelZero : M); - } - return TargetMask; -} - // Attempt to create a shuffle mask from a VSELECT condition mask. static bool createShuffleMaskFromVSELECT(SmallVectorImpl &Mask, SDValue Cond) { @@ -10967,9 +10948,9 @@ static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask, SelectionDAG &DAG); static bool matchVectorShuffleAsBlend(SDValue V1, SDValue V2, - MutableArrayRef TargetMask, - bool &ForceV1Zero, bool &ForceV2Zero, - uint64_t &BlendMask) { + MutableArrayRef Mask, + const APInt &Zeroable, bool &ForceV1Zero, + bool &ForceV2Zero, uint64_t &BlendMask) { bool V1IsZeroOrUndef = V1.isUndef() || ISD::isBuildVectorAllZeros(V1.getNode()); bool V2IsZeroOrUndef = @@ -10977,13 +10958,12 @@ static bool matchVectorShuffleAsBlend(SDValue V1, SDValue V2, BlendMask = 0; ForceV1Zero = false, ForceV2Zero = false; - assert(TargetMask.size() <= 64 && "Shuffle mask too big for blend mask"); + assert(Mask.size() <= 64 && "Shuffle mask too big for blend mask"); // Attempt to generate the binary blend mask. If an input is zero then // we can use any lane. - // TODO: generalize the zero matching to any scalar like isShuffleEquivalent. - for (int i = 0, Size = TargetMask.size(); i < Size; ++i) { - int M = TargetMask[i]; + for (int i = 0, Size = Mask.size(); i < Size; ++i) { + int M = Mask[i]; if (M == SM_SentinelUndef) continue; if (M == i) @@ -10992,16 +10972,16 @@ static bool matchVectorShuffleAsBlend(SDValue V1, SDValue V2, BlendMask |= 1ull << i; continue; } - if (M == SM_SentinelZero) { + if (Zeroable[i]) { if (V1IsZeroOrUndef) { ForceV1Zero = true; - TargetMask[i] = i; + Mask[i] = i; continue; } if (V2IsZeroOrUndef) { ForceV2Zero = true; BlendMask |= 1ull << i; - TargetMask[i] = i + Size; + Mask[i] = i + Size; continue; } } @@ -11030,11 +11010,10 @@ static SDValue lowerShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1, const APInt &Zeroable, const X86Subtarget &Subtarget, SelectionDAG &DAG) { - SmallVector Mask = createTargetShuffleMask(Original, Zeroable); - uint64_t BlendMask = 0; bool ForceV1Zero = false, ForceV2Zero = false; - if (!matchVectorShuffleAsBlend(V1, V2, Mask, ForceV1Zero, ForceV2Zero, + SmallVector Mask(Original.begin(), Original.end()); + if (!matchVectorShuffleAsBlend(V1, V2, Mask, Zeroable, ForceV1Zero, ForceV2Zero, BlendMask)) return SDValue(); @@ -32099,8 +32078,8 @@ static bool matchBinaryPermuteShuffle( uint64_t BlendMask = 0; bool ForceV1Zero = false, ForceV2Zero = false; SmallVector TargetMask(Mask.begin(), Mask.end()); - if (matchVectorShuffleAsBlend(V1, V2, TargetMask, ForceV1Zero, ForceV2Zero, - BlendMask)) { + if (matchVectorShuffleAsBlend(V1, V2, TargetMask, Zeroable, ForceV1Zero, + ForceV2Zero, BlendMask)) { if (MaskVT == MVT::v16i16) { // We can only use v16i16 PBLENDW if the lanes are repeated. SmallVector RepeatedMask; diff --git a/llvm/test/CodeGen/X86/packss.ll b/llvm/test/CodeGen/X86/packss.ll index c0fa42e3c2bad..e3bd9d9e6ed12 100644 --- a/llvm/test/CodeGen/X86/packss.ll +++ b/llvm/test/CodeGen/X86/packss.ll @@ -356,18 +356,18 @@ define <32 x i8> @packsswb_icmp_zero_trunc_256(<16 x i16> %a0) { ; ; AVX1-LABEL: packsswb_icmp_zero_trunc_256: ; AVX1: # %bb.0: -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX1-NEXT: vpcmpeqw %xmm2, %xmm1, %xmm1 -; AVX1-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = zero,zero,ymm0[0,1] -; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7] +; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 -; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 -; AVX1-NEXT: vpacksswb %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX1-NEXT: vpcmpeqw %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 +; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = zero,zero,ymm0[0,1] +; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm1 +; AVX1-NEXT: vpacksswb %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: ret{{[l|q]}} ; From c38881a6b7f9c1315c2d87654b9462195e409881 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Sun, 6 Oct 2019 13:08:08 +0000 Subject: [PATCH 057/254] [InstCombine] don't assume 'inbounds' for bitcast pointer to GEP transform (PR43501) https://bugs.llvm.org/show_bug.cgi?id=43501 We can't declare a GEP 'inbounds' in general. But we may salvage that information if we have known dereferenceable bytes on the source pointer. Differential Revision: https://reviews.llvm.org/D68244 llvm-svn: 373847 --- clang/test/CodeGen/aapcs-bitfield.c | 72 +++++++++---------- .../CodeGenCXX/microsoft-abi-dynamic-cast.cpp | 16 ++--- .../test/CodeGenCXX/microsoft-abi-typeid.cpp | 2 +- .../InstCombine/InstCombineCasts.cpp | 11 ++- .../Transforms/InstCombine/addrspacecast.ll | 2 +- llvm/test/Transforms/InstCombine/cast.ll | 4 +- .../InstCombine/load-bitcast-vec.ll | 35 +++++++++ llvm/test/Transforms/InstCombine/memset.ll | 2 +- .../test/Transforms/InstCombine/unpack-fca.ll | 18 ++--- 9 files changed, 102 insertions(+), 60 deletions(-) diff --git a/clang/test/CodeGen/aapcs-bitfield.c b/clang/test/CodeGen/aapcs-bitfield.c index ad7a73c91aabb..8d62f105a83f4 100644 --- a/clang/test/CodeGen/aapcs-bitfield.c +++ b/clang/test/CodeGen/aapcs-bitfield.c @@ -8,7 +8,7 @@ struct st0 { // LE-LABEL: @st0_check_load( // LE-NEXT: entry: -// LE-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST0:%.*]], %struct.st0* [[M:%.*]], i32 0, i32 0 +// LE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST0:%.*]], %struct.st0* [[M:%.*]], i32 0, i32 0 // LE-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[TMP0]], align 2 // LE-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 1 // LE-NEXT: [[BF_ASHR:%.*]] = ashr exact i8 [[BF_SHL]], 1 @@ -17,7 +17,7 @@ struct st0 { // // BE-LABEL: @st0_check_load( // BE-NEXT: entry: -// BE-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST0:%.*]], %struct.st0* [[M:%.*]], i32 0, i32 0 +// BE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST0:%.*]], %struct.st0* [[M:%.*]], i32 0, i32 0 // BE-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[TMP0]], align 2 // BE-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_LOAD]], 1 // BE-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 @@ -29,7 +29,7 @@ int st0_check_load(struct st0 *m) { // LE-LABEL: @st0_check_store( // LE-NEXT: entry: -// LE-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST0:%.*]], %struct.st0* [[M:%.*]], i32 0, i32 0 +// LE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST0:%.*]], %struct.st0* [[M:%.*]], i32 0, i32 0 // LE-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[TMP0]], align 2 // LE-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -128 // LE-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 1 @@ -38,7 +38,7 @@ int st0_check_load(struct st0 *m) { // // BE-LABEL: @st0_check_store( // BE-NEXT: entry: -// BE-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST0:%.*]], %struct.st0* [[M:%.*]], i32 0, i32 0 +// BE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST0:%.*]], %struct.st0* [[M:%.*]], i32 0, i32 0 // BE-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[TMP0]], align 2 // BE-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 1 // BE-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 2 @@ -56,7 +56,7 @@ struct st1 { // LE-LABEL: @st1_check_load( // LE-NEXT: entry: -// LE-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST1:%.*]], %struct.st1* [[M:%.*]], i32 0, i32 0 +// LE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST1:%.*]], %struct.st1* [[M:%.*]], i32 0, i32 0 // LE-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 // LE-NEXT: [[BF_ASHR:%.*]] = ashr i16 [[BF_LOAD]], 10 // LE-NEXT: [[CONV:%.*]] = sext i16 [[BF_ASHR]] to i32 @@ -64,7 +64,7 @@ struct st1 { // // BE-LABEL: @st1_check_load( // BE-NEXT: entry: -// BE-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST1:%.*]], %struct.st1* [[M:%.*]], i32 0, i32 0 +// BE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST1:%.*]], %struct.st1* [[M:%.*]], i32 0, i32 0 // BE-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 // BE-NEXT: [[BF_SHL:%.*]] = shl i16 [[BF_LOAD]], 10 // BE-NEXT: [[BF_ASHR:%.*]] = ashr exact i16 [[BF_SHL]], 10 @@ -77,7 +77,7 @@ int st1_check_load(struct st1 *m) { // LE-LABEL: @st1_check_store( // LE-NEXT: entry: -// LE-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST1:%.*]], %struct.st1* [[M:%.*]], i32 0, i32 0 +// LE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST1:%.*]], %struct.st1* [[M:%.*]], i32 0, i32 0 // LE-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 // LE-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], 1023 // LE-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1024 @@ -86,7 +86,7 @@ int st1_check_load(struct st1 *m) { // // BE-LABEL: @st1_check_store( // BE-NEXT: entry: -// BE-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST1:%.*]], %struct.st1* [[M:%.*]], i32 0, i32 0 +// BE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST1:%.*]], %struct.st1* [[M:%.*]], i32 0, i32 0 // BE-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 // BE-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -64 // BE-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1 @@ -151,7 +151,7 @@ struct st3 { // LE-LABEL: @st3_check_load( // LE-NEXT: entry: -// LE-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST3:%.*]], %struct.st3* [[M:%.*]], i32 0, i32 0 +// LE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST3:%.*]], %struct.st3* [[M:%.*]], i32 0, i32 0 // LE-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 2 // LE-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 1 // LE-NEXT: [[BF_ASHR:%.*]] = ashr exact i8 [[BF_SHL]], 1 @@ -160,7 +160,7 @@ struct st3 { // // BE-LABEL: @st3_check_load( // BE-NEXT: entry: -// BE-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST3:%.*]], %struct.st3* [[M:%.*]], i32 0, i32 0 +// BE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST3:%.*]], %struct.st3* [[M:%.*]], i32 0, i32 0 // BE-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 2 // BE-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_LOAD]], 1 // BE-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 @@ -172,7 +172,7 @@ int st3_check_load(struct st3 *m) { // LE-LABEL: @st3_check_store( // LE-NEXT: entry: -// LE-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST3:%.*]], %struct.st3* [[M:%.*]], i32 0, i32 0 +// LE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST3:%.*]], %struct.st3* [[M:%.*]], i32 0, i32 0 // LE-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 2 // LE-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -128 // LE-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 1 @@ -181,7 +181,7 @@ int st3_check_load(struct st3 *m) { // // BE-LABEL: @st3_check_store( // BE-NEXT: entry: -// BE-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST3:%.*]], %struct.st3* [[M:%.*]], i32 0, i32 0 +// BE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST3:%.*]], %struct.st3* [[M:%.*]], i32 0, i32 0 // BE-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 2 // BE-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 1 // BE-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 2 @@ -199,7 +199,7 @@ struct st4 { // LE-LABEL: @st4_check_load( // LE-NEXT: entry: -// LE-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST4:%.*]], %struct.st4* [[M:%.*]], i32 0, i32 0 +// LE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST4:%.*]], %struct.st4* [[M:%.*]], i32 0, i32 0 // LE-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 // LE-NEXT: [[BF_SHL:%.*]] = shl i16 [[BF_LOAD]], 2 // LE-NEXT: [[BF_ASHR:%.*]] = ashr i16 [[BF_SHL]], 11 @@ -210,7 +210,7 @@ struct st4 { // // BE-LABEL: @st4_check_load( // BE-NEXT: entry: -// BE-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST4:%.*]], %struct.st4* [[M:%.*]], i32 0, i32 0 +// BE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST4:%.*]], %struct.st4* [[M:%.*]], i32 0, i32 0 // BE-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 // BE-NEXT: [[BF_SHL:%.*]] = shl i16 [[BF_LOAD]], 9 // BE-NEXT: [[BF_ASHR:%.*]] = ashr i16 [[BF_SHL]], 11 @@ -225,7 +225,7 @@ int st4_check_load(struct st4 *m) { // LE-LABEL: @st4_check_store( // LE-NEXT: entry: -// LE-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST4:%.*]], %struct.st4* [[M:%.*]], i32 0, i32 0 +// LE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST4:%.*]], %struct.st4* [[M:%.*]], i32 0, i32 0 // LE-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 // LE-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -15873 // LE-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 512 @@ -234,7 +234,7 @@ int st4_check_load(struct st4 *m) { // // BE-LABEL: @st4_check_store( // BE-NEXT: entry: -// BE-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST4:%.*]], %struct.st4* [[M:%.*]], i32 0, i32 0 +// BE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST4:%.*]], %struct.st4* [[M:%.*]], i32 0, i32 0 // BE-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 // BE-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -125 // BE-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 4 @@ -247,7 +247,7 @@ void st4_check_store(struct st4 *m) { // LE-LABEL: @st4_check_nonv_store( // LE-NEXT: entry: -// LE-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST4:%.*]], %struct.st4* [[M:%.*]], i32 0, i32 0 +// LE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST4:%.*]], %struct.st4* [[M:%.*]], i32 0, i32 0 // LE-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 // LE-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -512 // LE-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1 @@ -256,7 +256,7 @@ void st4_check_store(struct st4 *m) { // // BE-LABEL: @st4_check_nonv_store( // BE-NEXT: entry: -// BE-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST4:%.*]], %struct.st4* [[M:%.*]], i32 0, i32 0 +// BE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST4:%.*]], %struct.st4* [[M:%.*]], i32 0, i32 0 // BE-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 // BE-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], 127 // BE-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 128 @@ -323,7 +323,7 @@ struct st6 { // LE-LABEL: @st6_check_load( // LE-NEXT: entry: -// LE-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST6:%.*]], %struct.st6* [[M:%.*]], i32 0, i32 0 +// LE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST6:%.*]], %struct.st6* [[M:%.*]], i32 0, i32 0 // LE-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 // LE-NEXT: [[BF_SHL:%.*]] = shl i16 [[BF_LOAD]], 4 // LE-NEXT: [[BF_ASHR:%.*]] = ashr exact i16 [[BF_SHL]], 4 @@ -342,7 +342,7 @@ struct st6 { // // BE-LABEL: @st6_check_load( // BE-NEXT: entry: -// BE-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST6:%.*]], %struct.st6* [[M:%.*]], i32 0, i32 0 +// BE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST6:%.*]], %struct.st6* [[M:%.*]], i32 0, i32 0 // BE-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 // BE-NEXT: [[BF_ASHR:%.*]] = ashr i16 [[BF_LOAD]], 4 // BE-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_ASHR]] to i32 @@ -366,7 +366,7 @@ int st6_check_load(struct st6 *m) { // LE-LABEL: @st6_check_store( // LE-NEXT: entry: -// LE-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST6:%.*]], %struct.st6* [[M:%.*]], i32 0, i32 0 +// LE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST6:%.*]], %struct.st6* [[M:%.*]], i32 0, i32 0 // LE-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 // LE-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -4096 // LE-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1 @@ -382,7 +382,7 @@ int st6_check_load(struct st6 *m) { // // BE-LABEL: @st6_check_store( // BE-NEXT: entry: -// BE-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST6:%.*]], %struct.st6* [[M:%.*]], i32 0, i32 0 +// BE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST6:%.*]], %struct.st6* [[M:%.*]], i32 0, i32 0 // BE-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 // BE-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], 15 // BE-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 16 @@ -492,13 +492,13 @@ struct st8 { // LE-LABEL: @st8_check_assignment( // LE-NEXT: entry: -// LE-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST8:%.*]], %struct.st8* [[M:%.*]], i32 0, i32 0 +// LE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST8:%.*]], %struct.st8* [[M:%.*]], i32 0, i32 0 // LE-NEXT: store i16 -1, i16* [[TMP0]], align 4 // LE-NEXT: ret i32 65535 // // BE-LABEL: @st8_check_assignment( // BE-NEXT: entry: -// BE-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST8:%.*]], %struct.st8* [[M:%.*]], i32 0, i32 0 +// BE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST8:%.*]], %struct.st8* [[M:%.*]], i32 0, i32 0 // BE-NEXT: store i16 -1, i16* [[TMP0]], align 4 // BE-NEXT: ret i32 65535 // @@ -512,14 +512,14 @@ struct st9{ // LE-LABEL: @read_st9( // LE-NEXT: entry: -// LE-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST9:%.*]], %struct.st9* [[M:%.*]], i32 0, i32 0 +// LE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST9:%.*]], %struct.st9* [[M:%.*]], i32 0, i32 0 // LE-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4 // LE-NEXT: [[BF_CAST:%.*]] = sext i8 [[BF_LOAD]] to i32 // LE-NEXT: ret i32 [[BF_CAST]] // // BE-LABEL: @read_st9( // BE-NEXT: entry: -// BE-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST9:%.*]], %struct.st9* [[M:%.*]], i32 0, i32 0 +// BE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST9:%.*]], %struct.st9* [[M:%.*]], i32 0, i32 0 // BE-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4 // BE-NEXT: [[BF_CAST:%.*]] = sext i8 [[BF_LOAD]] to i32 // BE-NEXT: ret i32 [[BF_CAST]] @@ -530,13 +530,13 @@ int read_st9(volatile struct st9 *m) { // LE-LABEL: @store_st9( // LE-NEXT: entry: -// LE-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST9:%.*]], %struct.st9* [[M:%.*]], i32 0, i32 0 +// LE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST9:%.*]], %struct.st9* [[M:%.*]], i32 0, i32 0 // LE-NEXT: store volatile i8 1, i8* [[TMP0]], align 4 // LE-NEXT: ret void // // BE-LABEL: @store_st9( // BE-NEXT: entry: -// BE-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST9:%.*]], %struct.st9* [[M:%.*]], i32 0, i32 0 +// BE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST9:%.*]], %struct.st9* [[M:%.*]], i32 0, i32 0 // BE-NEXT: store volatile i8 1, i8* [[TMP0]], align 4 // BE-NEXT: ret void // @@ -546,7 +546,7 @@ void store_st9(volatile struct st9 *m) { // LE-LABEL: @increment_st9( // LE-NEXT: entry: -// LE-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST9:%.*]], %struct.st9* [[M:%.*]], i32 0, i32 0 +// LE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST9:%.*]], %struct.st9* [[M:%.*]], i32 0, i32 0 // LE-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4 // LE-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 // LE-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 4 @@ -554,7 +554,7 @@ void store_st9(volatile struct st9 *m) { // // BE-LABEL: @increment_st9( // BE-NEXT: entry: -// BE-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST9:%.*]], %struct.st9* [[M:%.*]], i32 0, i32 0 +// BE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST9:%.*]], %struct.st9* [[M:%.*]], i32 0, i32 0 // BE-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4 // BE-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 // BE-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 4 @@ -571,7 +571,7 @@ struct st10{ // LE-LABEL: @read_st10( // LE-NEXT: entry: -// LE-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST10:%.*]], %struct.st10* [[M:%.*]], i32 0, i32 0 +// LE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST10:%.*]], %struct.st10* [[M:%.*]], i32 0, i32 0 // LE-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 // LE-NEXT: [[BF_SHL:%.*]] = shl i16 [[BF_LOAD]], 7 // LE-NEXT: [[BF_ASHR:%.*]] = ashr i16 [[BF_SHL]], 8 @@ -580,7 +580,7 @@ struct st10{ // // BE-LABEL: @read_st10( // BE-NEXT: entry: -// BE-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST10:%.*]], %struct.st10* [[M:%.*]], i32 0, i32 0 +// BE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST10:%.*]], %struct.st10* [[M:%.*]], i32 0, i32 0 // BE-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 // BE-NEXT: [[BF_SHL:%.*]] = shl i16 [[BF_LOAD]], 1 // BE-NEXT: [[BF_ASHR:%.*]] = ashr i16 [[BF_SHL]], 8 @@ -593,7 +593,7 @@ int read_st10(volatile struct st10 *m) { // LE-LABEL: @store_st10( // LE-NEXT: entry: -// LE-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST10:%.*]], %struct.st10* [[M:%.*]], i32 0, i32 0 +// LE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST10:%.*]], %struct.st10* [[M:%.*]], i32 0, i32 0 // LE-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 // LE-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -511 // LE-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 2 @@ -602,7 +602,7 @@ int read_st10(volatile struct st10 *m) { // // BE-LABEL: @store_st10( // BE-NEXT: entry: -// BE-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST10:%.*]], %struct.st10* [[M:%.*]], i32 0, i32 0 +// BE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST10:%.*]], %struct.st10* [[M:%.*]], i32 0, i32 0 // BE-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 // BE-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -32641 // BE-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 128 @@ -615,7 +615,7 @@ void store_st10(volatile struct st10 *m) { // LE-LABEL: @increment_st10( // LE-NEXT: entry: -// LE-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST10:%.*]], %struct.st10* [[M:%.*]], i32 0, i32 0 +// LE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST10:%.*]], %struct.st10* [[M:%.*]], i32 0, i32 0 // LE-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 // LE-NEXT: [[BF_LOAD1:%.*]] = load volatile i16, i16* [[TMP0]], align 4 // LE-NEXT: [[TMP1:%.*]] = add i16 [[BF_LOAD]], 2 @@ -627,7 +627,7 @@ void store_st10(volatile struct st10 *m) { // // BE-LABEL: @increment_st10( // BE-NEXT: entry: -// BE-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST10:%.*]], %struct.st10* [[M:%.*]], i32 0, i32 0 +// BE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST10:%.*]], %struct.st10* [[M:%.*]], i32 0, i32 0 // BE-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 // BE-NEXT: [[BF_LOAD1:%.*]] = load volatile i16, i16* [[TMP0]], align 4 // BE-NEXT: [[TMP1:%.*]] = add i16 [[BF_LOAD]], 128 diff --git a/clang/test/CodeGenCXX/microsoft-abi-dynamic-cast.cpp b/clang/test/CodeGenCXX/microsoft-abi-dynamic-cast.cpp index 9567245d98519..c99df0e88b420 100644 --- a/clang/test/CodeGenCXX/microsoft-abi-dynamic-cast.cpp +++ b/clang/test/CodeGenCXX/microsoft-abi-dynamic-cast.cpp @@ -20,7 +20,7 @@ T* test1(V* x) { return &dynamic_cast(*x); } T* test2(A* x) { return &dynamic_cast(*x); } // CHECK-LABEL: define dso_local %struct.T* @"?test2@@YAPAUT@@PAUA@@@Z"(%struct.A* %x) // CHECK: [[CAST:%.*]] = bitcast %struct.A* %x to i8* -// CHECK-NEXT: [[VBPTRPTR:%.*]] = getelementptr inbounds %struct.A, %struct.A* %x, i32 0, i32 0 +// CHECK-NEXT: [[VBPTRPTR:%.*]] = getelementptr %struct.A, %struct.A* %x, i32 0, i32 0 // CHECK-NEXT: [[VBTBL:%.*]] = load i32*, i32** [[VBPTRPTR]], align 4 // CHECK-NEXT: [[VBOFFP:%.*]] = getelementptr inbounds i32, i32* [[VBTBL]], i32 1 // CHECK-NEXT: [[VBOFFS:%.*]] = load i32, i32* [[VBOFFP]], align 4 @@ -31,7 +31,7 @@ T* test2(A* x) { return &dynamic_cast(*x); } T* test3(B* x) { return &dynamic_cast(*x); } // CHECK-LABEL: define dso_local %struct.T* @"?test3@@YAPAUT@@PAUB@@@Z"(%struct.B* %x) -// CHECK: [[VOIDP:%.*]] = getelementptr inbounds %struct.B, %struct.B* %x, i32 0, i32 0, i32 0 +// CHECK: [[VOIDP:%.*]] = getelementptr %struct.B, %struct.B* %x, i32 0, i32 0, i32 0 // CHECK-NEXT: [[VBPTR:%.*]] = getelementptr inbounds i8, i8* [[VOIDP]], i32 4 // CHECK-NEXT: [[VBPTRPTR:%.*]] = bitcast i8* [[VBPTR:%.*]] to i32** // CHECK-NEXT: [[VBTBL:%.*]] = load i32*, i32** [[VBPTRPTR]], align 4 @@ -55,7 +55,7 @@ T* test5(A* x) { return dynamic_cast(x); } // CHECK: [[CHECK:%.*]] = icmp eq %struct.A* %x, null // CHECK-NEXT: br i1 [[CHECK]] // CHECK: [[VOIDP:%.*]] = bitcast %struct.A* %x to i8* -// CHECK-NEXT: [[VBPTRPTR:%.*]] = getelementptr inbounds %struct.A, %struct.A* %x, i32 0, i32 0 +// CHECK-NEXT: [[VBPTRPTR:%.*]] = getelementptr %struct.A, %struct.A* %x, i32 0, i32 0 // CHECK-NEXT: [[VBTBL:%.*]] = load i32*, i32** [[VBPTRPTR]], align 4 // CHECK-NEXT: [[VBOFFP:%.*]] = getelementptr inbounds i32, i32* [[VBTBL]], i32 1 // CHECK-NEXT: [[VBOFFS:%.*]] = load i32, i32* [[VBOFFP]], align 4 @@ -70,7 +70,7 @@ T* test6(B* x) { return dynamic_cast(x); } // CHECK-LABEL: define dso_local %struct.T* @"?test6@@YAPAUT@@PAUB@@@Z"(%struct.B* %x) // CHECK: [[CHECK:%.*]] = icmp eq %struct.B* %x, null // CHECK-NEXT: br i1 [[CHECK]] -// CHECK: [[CAST:%.*]] = getelementptr inbounds %struct.B, %struct.B* %x, i32 0, i32 0, i32 0 +// CHECK: [[CAST:%.*]] = getelementptr %struct.B, %struct.B* %x, i32 0, i32 0, i32 0 // CHECK-NEXT: [[VBPTR:%.*]] = getelementptr inbounds i8, i8* [[CAST]], i32 4 // CHECK-NEXT: [[VBPTRPTR:%.*]] = bitcast i8* [[VBPTR]] to i32** // CHECK-NEXT: [[VBTBL:%.*]] = load i32*, i32** [[VBPTRPTR]], align 4 @@ -78,7 +78,7 @@ T* test6(B* x) { return dynamic_cast(x); } // CHECK-NEXT: [[VBOFFS:%.*]] = load i32, i32* [[VBOFFP]], align 4 // CHECK-NEXT: [[DELTA:%.*]] = add nsw i32 [[VBOFFS]], 4 // CHECK-NEXT: [[ADJ:%.*]] = getelementptr inbounds i8, i8* [[CAST]], i32 [[DELTA]] -// CHECK-NEXT: [[CALL:%.*]] = tail call i8* @__RTDynamicCast(i8* nonnull [[ADJ]], i32 [[DELTA]], i8* {{.*}}bitcast (%rtti.TypeDescriptor7* @"??_R0?AUB@@@8" to i8*), i8* {{.*}}bitcast (%rtti.TypeDescriptor7* @"??_R0?AUT@@@8" to i8*), i32 0) +// CHECK-NEXT: [[CALL:%.*]] = tail call i8* @__RTDynamicCast(i8* [[ADJ]], i32 [[DELTA]], i8* {{.*}}bitcast (%rtti.TypeDescriptor7* @"??_R0?AUB@@@8" to i8*), i8* {{.*}}bitcast (%rtti.TypeDescriptor7* @"??_R0?AUT@@@8" to i8*), i32 0) // CHECK-NEXT: [[RES:%.*]] = bitcast i8* [[CALL]] to %struct.T* // CHECK-NEXT: br label // CHECK: [[RET:%.*]] = phi %struct.T* @@ -95,7 +95,7 @@ void* test8(A* x) { return dynamic_cast(x); } // CHECK: [[CHECK:%.*]] = icmp eq %struct.A* %x, null // CHECK-NEXT: br i1 [[CHECK]] // CHECK: [[VOIDP:%.*]] = bitcast %struct.A* %x to i8* -// CHECK-NEXT: [[VBPTRPTR:%.*]] = getelementptr inbounds %struct.A, %struct.A* %x, i32 0, i32 0 +// CHECK-NEXT: [[VBPTRPTR:%.*]] = getelementptr %struct.A, %struct.A* %x, i32 0, i32 0 // CHECK-NEXT: [[VBTBL:%.*]] = load i32*, i32** [[VBPTRPTR]], align 4 // CHECK-NEXT: [[VBOFFP:%.*]] = getelementptr inbounds i32, i32* [[VBTBL]], i32 1 // CHECK-NEXT: [[VBOFFS:%.*]] = load i32, i32* [[VBOFFP]], align 4 @@ -109,7 +109,7 @@ void* test9(B* x) { return dynamic_cast(x); } // CHECK-LABEL: define dso_local i8* @"?test9@@YAPAXPAUB@@@Z"(%struct.B* %x) // CHECK: [[CHECK:%.*]] = icmp eq %struct.B* %x, null // CHECK-NEXT: br i1 [[CHECK]] -// CHECK: [[CAST:%.*]] = getelementptr inbounds %struct.B, %struct.B* %x, i32 0, i32 0, i32 0 +// CHECK: [[CAST:%.*]] = getelementptr %struct.B, %struct.B* %x, i32 0, i32 0, i32 0 // CHECK-NEXT: [[VBPTR:%.*]] = getelementptr inbounds i8, i8* [[CAST]], i32 4 // CHECK-NEXT: [[VBPTRPTR:%.*]] = bitcast i8* [[VBPTR]] to i32** // CHECK-NEXT: [[VBTBL:%.*]] = load i32*, i32** [[VBPTRPTR]], align 4 @@ -117,7 +117,7 @@ void* test9(B* x) { return dynamic_cast(x); } // CHECK-NEXT: [[VBOFFS:%.*]] = load i32, i32* [[VBOFFP]], align 4 // CHECK-NEXT: [[DELTA:%.*]] = add nsw i32 [[VBOFFS]], 4 // CHECK-NEXT: [[ADJ:%.*]] = getelementptr inbounds i8, i8* [[CAST]], i32 [[DELTA]] -// CHECK-NEXT: [[CALL:%.*]] = tail call i8* @__RTCastToVoid(i8* nonnull [[ADJ]]) +// CHECK-NEXT: [[CALL:%.*]] = tail call i8* @__RTCastToVoid(i8* [[ADJ]]) // CHECK-NEXT: br label // CHECK: [[RET:%.*]] = phi i8* // CHECK-NEXT: ret i8* [[RET]] diff --git a/clang/test/CodeGenCXX/microsoft-abi-typeid.cpp b/clang/test/CodeGenCXX/microsoft-abi-typeid.cpp index 128f2710dfe6d..848e280cd9fe0 100644 --- a/clang/test/CodeGenCXX/microsoft-abi-typeid.cpp +++ b/clang/test/CodeGenCXX/microsoft-abi-typeid.cpp @@ -31,7 +31,7 @@ const std::type_info* test3_typeid() { return &typeid(*fn()); } // CHECK: tail call i8* @__RTtypeid(i8* null) // CHECK-NEXT: unreachable // CHECK: [[THIS:%.*]] = bitcast %struct.A* [[CALL]] to i8* -// CHECK-NEXT: [[VBTBLP:%.*]] = getelementptr inbounds %struct.A, %struct.A* [[CALL]], i32 0, i32 0 +// CHECK-NEXT: [[VBTBLP:%.*]] = getelementptr %struct.A, %struct.A* [[CALL]], i32 0, i32 0 // CHECK-NEXT: [[VBTBL:%.*]] = load i32*, i32** [[VBTBLP]], align 4 // CHECK-NEXT: [[VBSLOT:%.*]] = getelementptr inbounds i32, i32* [[VBTBL]], i32 1 // CHECK-NEXT: [[VBASE_OFFS:%.*]] = load i32, i32* [[VBSLOT]], align 4 diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp index 74c69808f1585..c58e63d08e31c 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -2338,8 +2338,15 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { // If we found a path from the src to dest, create the getelementptr now. if (SrcElTy == DstElTy) { SmallVector Idxs(NumZeros + 1, Builder.getInt32(0)); - return GetElementPtrInst::CreateInBounds(SrcPTy->getElementType(), Src, - Idxs); + GetElementPtrInst *GEP = + GetElementPtrInst::Create(SrcPTy->getElementType(), Src, Idxs); + + // If the source pointer is dereferenceable, then assume it points to an + // allocated object and apply "inbounds" to the GEP. + bool CanBeNull; + if (Src->getPointerDereferenceableBytes(DL, CanBeNull)) + GEP->setIsInBounds(); + return GEP; } } diff --git a/llvm/test/Transforms/InstCombine/addrspacecast.ll b/llvm/test/Transforms/InstCombine/addrspacecast.ll index 6caefb166dbf5..2e34f61a66235 100644 --- a/llvm/test/Transforms/InstCombine/addrspacecast.ll +++ b/llvm/test/Transforms/InstCombine/addrspacecast.ll @@ -104,7 +104,7 @@ define <4 x float addrspace(2)*> @combine_addrspacecast_types_vector(<4 x i32 ad define i32 @canonicalize_addrspacecast([16 x i32] addrspace(1)* %arr) { ; CHECK-LABEL: @canonicalize_addrspacecast( -; CHECK-NEXT: getelementptr inbounds [16 x i32], [16 x i32] addrspace(1)* %arr, i32 0, i32 0 +; CHECK-NEXT: getelementptr [16 x i32], [16 x i32] addrspace(1)* %arr, i32 0, i32 0 ; CHECK-NEXT: addrspacecast i32 addrspace(1)* %{{[a-zA-Z0-9]+}} to i32* ; CHECK-NEXT: load i32, i32* ; CHECK-NEXT: ret i32 diff --git a/llvm/test/Transforms/InstCombine/cast.ll b/llvm/test/Transforms/InstCombine/cast.ll index b6d1eda0601dd..fd35bd92dd7dc 100644 --- a/llvm/test/Transforms/InstCombine/cast.ll +++ b/llvm/test/Transforms/InstCombine/cast.ll @@ -293,7 +293,7 @@ define i32 @test26(float %F) { define [4 x float]* @test27([9 x [4 x float]]* %A) { ; CHECK-LABEL: @test27( -; CHECK-NEXT: [[C:%.*]] = getelementptr inbounds [9 x [4 x float]], [9 x [4 x float]]* [[A:%.*]], i64 0, i64 0 +; CHECK-NEXT: [[C:%.*]] = getelementptr [9 x [4 x float]], [9 x [4 x float]]* [[A:%.*]], i64 0, i64 0 ; CHECK-NEXT: ret [4 x float]* [[C]] ; %c = bitcast [9 x [4 x float]]* %A to [4 x float]* @@ -302,7 +302,7 @@ define [4 x float]* @test27([9 x [4 x float]]* %A) { define float* @test28([4 x float]* %A) { ; CHECK-LABEL: @test28( -; CHECK-NEXT: [[C:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[A:%.*]], i64 0, i64 0 +; CHECK-NEXT: [[C:%.*]] = getelementptr [4 x float], [4 x float]* [[A:%.*]], i64 0, i64 0 ; CHECK-NEXT: ret float* [[C]] ; %c = bitcast [4 x float]* %A to float* diff --git a/llvm/test/Transforms/InstCombine/load-bitcast-vec.ll b/llvm/test/Transforms/InstCombine/load-bitcast-vec.ll index e6540ee70611f..cb1b224e06091 100644 --- a/llvm/test/Transforms/InstCombine/load-bitcast-vec.ll +++ b/llvm/test/Transforms/InstCombine/load-bitcast-vec.ll @@ -67,6 +67,41 @@ define float @matching_scalar_small_deref(<4 x float>* dereferenceable(15) %p) { ret float %r } +define float @matching_scalar_smallest_deref(<4 x float>* dereferenceable(1) %p) { +; CHECK-LABEL: @matching_scalar_smallest_deref( +; CHECK-NEXT: [[BC:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[P:%.*]], i64 0, i64 0 +; CHECK-NEXT: [[R:%.*]] = load float, float* [[BC]], align 16 +; CHECK-NEXT: ret float [[R]] +; + %bc = bitcast <4 x float>* %p to float* + %r = load float, float* %bc, align 16 + ret float %r +} + +define float @matching_scalar_smallest_deref_or_null(<4 x float>* dereferenceable_or_null(1) %p) { +; CHECK-LABEL: @matching_scalar_smallest_deref_or_null( +; CHECK-NEXT: [[BC:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[P:%.*]], i64 0, i64 0 +; CHECK-NEXT: [[R:%.*]] = load float, float* [[BC]], align 16 +; CHECK-NEXT: ret float [[R]] +; + %bc = bitcast <4 x float>* %p to float* + %r = load float, float* %bc, align 16 + ret float %r +} + +; TODO: Is a null pointer inbounds in any address space? + +define float @matching_scalar_smallest_deref_or_null_addrspace(<4 x float> addrspace(4)* dereferenceable_or_null(1) %p) { +; CHECK-LABEL: @matching_scalar_smallest_deref_or_null_addrspace( +; CHECK-NEXT: [[BC:%.*]] = getelementptr inbounds <4 x float>, <4 x float> addrspace(4)* [[P:%.*]], i64 0, i64 0 +; CHECK-NEXT: [[R:%.*]] = load float, float addrspace(4)* [[BC]], align 16 +; CHECK-NEXT: ret float [[R]] +; + %bc = bitcast <4 x float> addrspace(4)* %p to float addrspace(4)* + %r = load float, float addrspace(4)* %bc, align 16 + ret float %r +} + define float @matching_scalar_volatile(<4 x float>* dereferenceable(16) %p) { ; CHECK-LABEL: @matching_scalar_volatile( ; CHECK-NEXT: [[BC:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[P:%.*]], i64 0, i64 0 diff --git a/llvm/test/Transforms/InstCombine/memset.ll b/llvm/test/Transforms/InstCombine/memset.ll index 7d531f2965d0d..b994d97c7a257 100644 --- a/llvm/test/Transforms/InstCombine/memset.ll +++ b/llvm/test/Transforms/InstCombine/memset.ll @@ -3,7 +3,7 @@ define i32 @test([1024 x i8]* %target) { ; CHECK-LABEL: @test( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[TARGET:%.*]], i64 0, i64 0 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [1024 x i8], [1024 x i8]* [[TARGET:%.*]], i64 0, i64 0 ; CHECK-NEXT: store i8 1, i8* [[TMP1]], align 1 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast [1024 x i8]* [[TARGET]] to i16* ; CHECK-NEXT: store i16 257, i16* [[TMP2]], align 2 diff --git a/llvm/test/Transforms/InstCombine/unpack-fca.ll b/llvm/test/Transforms/InstCombine/unpack-fca.ll index 3c5e4177d69f9..1bfd53f40322a 100644 --- a/llvm/test/Transforms/InstCombine/unpack-fca.ll +++ b/llvm/test/Transforms/InstCombine/unpack-fca.ll @@ -13,7 +13,7 @@ declare i32 @A.foo(%A* nocapture %this) define void @storeA(%A* %a.ptr) { ; CHECK-LABEL: storeA -; CHECK-NEXT: [[GEP:%[a-z0-9\.]+]] = getelementptr inbounds %A, %A* %a.ptr, i64 0, i32 0 +; CHECK-NEXT: [[GEP:%[a-z0-9\.]+]] = getelementptr %A, %A* %a.ptr, i64 0, i32 0 ; CHECK-NEXT: store %A__vtbl* @A__vtblZ, %A__vtbl** [[GEP]], align 8 ; CHECK-NEXT: ret void store %A { %A__vtbl* @A__vtblZ }, %A* %a.ptr, align 8 @@ -33,7 +33,7 @@ define void @storeB(%B* %b.ptr) { define void @storeStructOfA({ %A }* %sa.ptr) { ; CHECK-LABEL: storeStructOfA -; CHECK-NEXT: [[GEP:%[a-z0-9\.]+]] = getelementptr inbounds { %A }, { %A }* %sa.ptr, i64 0, i32 0, i32 0 +; CHECK-NEXT: [[GEP:%[a-z0-9\.]+]] = getelementptr { %A }, { %A }* %sa.ptr, i64 0, i32 0, i32 0 ; CHECK-NEXT: store %A__vtbl* @A__vtblZ, %A__vtbl** [[GEP]], align 8 ; CHECK-NEXT: ret void store { %A } { %A { %A__vtbl* @A__vtblZ } }, { %A }* %sa.ptr, align 8 @@ -42,7 +42,7 @@ define void @storeStructOfA({ %A }* %sa.ptr) { define void @storeArrayOfA([1 x %A]* %aa.ptr) { ; CHECK-LABEL: storeArrayOfA -; CHECK-NEXT: [[GEP:%[a-z0-9\.]+]] = getelementptr inbounds [1 x %A], [1 x %A]* %aa.ptr, i64 0, i64 0, i32 0 +; CHECK-NEXT: [[GEP:%[a-z0-9\.]+]] = getelementptr [1 x %A], [1 x %A]* %aa.ptr, i64 0, i64 0, i32 0 ; CHECK-NEXT: store %A__vtbl* @A__vtblZ, %A__vtbl** [[GEP]], align 8 ; CHECK-NEXT: ret void store [1 x %A] [%A { %A__vtbl* @A__vtblZ }], [1 x %A]* %aa.ptr, align 8 @@ -60,7 +60,7 @@ define void @storeLargeArrayOfA([2000 x %A]* %aa.ptr) { define void @storeStructOfArrayOfA({ [1 x %A] }* %saa.ptr) { ; CHECK-LABEL: storeStructOfArrayOfA -; CHECK-NEXT: [[GEP:%[a-z0-9\.]+]] = getelementptr inbounds { [1 x %A] }, { [1 x %A] }* %saa.ptr, i64 0, i32 0, i64 0, i32 0 +; CHECK-NEXT: [[GEP:%[a-z0-9\.]+]] = getelementptr { [1 x %A] }, { [1 x %A] }* %saa.ptr, i64 0, i32 0, i64 0, i32 0 ; CHECK-NEXT: store %A__vtbl* @A__vtblZ, %A__vtbl** [[GEP]], align 8 ; CHECK-NEXT: ret void store { [1 x %A] } { [1 x %A] [%A { %A__vtbl* @A__vtblZ }] }, { [1 x %A] }* %saa.ptr, align 8 @@ -90,7 +90,7 @@ define void @storeArrayOfB([2 x %B]* %ab.ptr, [2 x %B] %ab) { define %A @loadA(%A* %a.ptr) { ; CHECK-LABEL: loadA -; CHECK-NEXT: [[GEP:%[a-z0-9\.]+]] = getelementptr inbounds %A, %A* %a.ptr, i64 0, i32 0 +; CHECK-NEXT: [[GEP:%[a-z0-9\.]+]] = getelementptr %A, %A* %a.ptr, i64 0, i32 0 ; CHECK-NEXT: [[LOAD:%[a-z0-9\.]+]] = load %A__vtbl*, %A__vtbl** [[GEP]], align 8 ; CHECK-NEXT: [[IV:%[a-z0-9\.]+]] = insertvalue %A undef, %A__vtbl* [[LOAD]], 0 ; CHECK-NEXT: ret %A [[IV]] @@ -113,7 +113,7 @@ define %B @loadB(%B* %b.ptr) { define { %A } @loadStructOfA({ %A }* %sa.ptr) { ; CHECK-LABEL: loadStructOfA -; CHECK-NEXT: [[GEP:%[a-z0-9\.]+]] = getelementptr inbounds { %A }, { %A }* %sa.ptr, i64 0, i32 0, i32 0 +; CHECK-NEXT: [[GEP:%[a-z0-9\.]+]] = getelementptr { %A }, { %A }* %sa.ptr, i64 0, i32 0, i32 0 ; CHECK-NEXT: [[LOAD:%[a-z0-9\.]+]] = load %A__vtbl*, %A__vtbl** [[GEP]], align 8 ; CHECK-NEXT: [[IV1:%[a-z0-9\.]+]] = insertvalue %A undef, %A__vtbl* [[LOAD]], 0 ; CHECK-NEXT: [[IV2:%[a-z0-9\.]+]] = insertvalue { %A } undef, %A [[IV1]], 0 @@ -124,7 +124,7 @@ define { %A } @loadStructOfA({ %A }* %sa.ptr) { define [1 x %A] @loadArrayOfA([1 x %A]* %aa.ptr) { ; CHECK-LABEL: loadArrayOfA -; CHECK-NEXT: [[GEP:%[a-z0-9\.]+]] = getelementptr inbounds [1 x %A], [1 x %A]* %aa.ptr, i64 0, i64 0, i32 0 +; CHECK-NEXT: [[GEP:%[a-z0-9\.]+]] = getelementptr [1 x %A], [1 x %A]* %aa.ptr, i64 0, i64 0, i32 0 ; CHECK-NEXT: [[LOAD:%[a-z0-9\.]+]] = load %A__vtbl*, %A__vtbl** [[GEP]], align 8 ; CHECK-NEXT: [[IV1:%[a-z0-9\.]+]] = insertvalue %A undef, %A__vtbl* [[LOAD]], 0 ; CHECK-NEXT: [[IV2:%[a-z0-9\.]+]] = insertvalue [1 x %A] undef, %A [[IV1]], 0 @@ -135,7 +135,7 @@ define [1 x %A] @loadArrayOfA([1 x %A]* %aa.ptr) { define { [1 x %A] } @loadStructOfArrayOfA({ [1 x %A] }* %saa.ptr) { ; CHECK-LABEL: loadStructOfArrayOfA -; CHECK-NEXT: [[GEP:%[a-z0-9\.]+]] = getelementptr inbounds { [1 x %A] }, { [1 x %A] }* %saa.ptr, i64 0, i32 0, i64 0, i32 0 +; CHECK-NEXT: [[GEP:%[a-z0-9\.]+]] = getelementptr { [1 x %A] }, { [1 x %A] }* %saa.ptr, i64 0, i32 0, i64 0, i32 0 ; CHECK-NEXT: [[LOAD:%[a-z0-9\.]+]] = load %A__vtbl*, %A__vtbl** [[GEP]], align 8 ; CHECK-NEXT: [[IV1:%[a-z0-9\.]+]] = insertvalue %A undef, %A__vtbl* [[LOAD]], 0 ; CHECK-NEXT: [[IV2:%[a-z0-9\.]+]] = insertvalue [1 x %A] undef, %A [[IV1]], 0 @@ -147,7 +147,7 @@ define { [1 x %A] } @loadStructOfArrayOfA({ [1 x %A] }* %saa.ptr) { define { %A } @structOfA({ %A }* %sa.ptr) { ; CHECK-LABEL: structOfA -; CHECK-NEXT: [[GEP:%[a-z0-9\.]+]] = getelementptr inbounds { %A }, { %A }* %sa.ptr, i64 0, i32 0, i32 0 +; CHECK-NEXT: [[GEP:%[a-z0-9\.]+]] = getelementptr { %A }, { %A }* %sa.ptr, i64 0, i32 0, i32 0 ; CHECK-NEXT: store %A__vtbl* @A__vtblZ, %A__vtbl** [[GEP]], align 8 ; CHECK-NEXT: ret { %A } { %A { %A__vtbl* @A__vtblZ } } store { %A } { %A { %A__vtbl* @A__vtblZ } }, { %A }* %sa.ptr, align 8 From 61c22a83dee717a8a3f99aed3e66680da83507c6 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Sun, 6 Oct 2019 13:19:05 +0000 Subject: [PATCH 058/254] [InstCombine] add fast-math-flags for better test coverage; NFC llvm-svn: 373848 --- llvm/test/Transforms/InstCombine/fmul.ll | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/test/Transforms/InstCombine/fmul.ll b/llvm/test/Transforms/InstCombine/fmul.ll index 5ab6d93c834b8..1bcca95d04536 100644 --- a/llvm/test/Transforms/InstCombine/fmul.ll +++ b/llvm/test/Transforms/InstCombine/fmul.ll @@ -1006,11 +1006,11 @@ define float @negate_if_true(float %x, i1 %cond) { define float @negate_if_false(float %x, i1 %cond) { ; CHECK-LABEL: @negate_if_false( ; CHECK-NEXT: [[SEL:%.*]] = select i1 [[COND:%.*]], float 1.000000e+00, float -1.000000e+00 -; CHECK-NEXT: [[R:%.*]] = fmul float [[SEL]], [[X:%.*]] +; CHECK-NEXT: [[R:%.*]] = fmul arcp float [[SEL]], [[X:%.*]] ; CHECK-NEXT: ret float [[R]] ; %sel = select i1 %cond, float 1.0, float -1.0 - %r = fmul float %sel, %x + %r = fmul arcp float %sel, %x ret float %r } @@ -1018,12 +1018,12 @@ define <2 x double> @negate_if_true_commute(<2 x double> %px, i1 %cond) { ; CHECK-LABEL: @negate_if_true_commute( ; CHECK-NEXT: [[X:%.*]] = fdiv <2 x double> , [[PX:%.*]] ; CHECK-NEXT: [[SEL:%.*]] = select i1 [[COND:%.*]], <2 x double> , <2 x double> -; CHECK-NEXT: [[R:%.*]] = fmul <2 x double> [[X]], [[SEL]] +; CHECK-NEXT: [[R:%.*]] = fmul ninf <2 x double> [[X]], [[SEL]] ; CHECK-NEXT: ret <2 x double> [[R]] ; %x = fdiv <2 x double> , %px ; thwart complexity-based canonicalization %sel = select i1 %cond, <2 x double> , <2 x double> - %r = fmul <2 x double> %x, %sel + %r = fmul ninf <2 x double> %x, %sel ret <2 x double> %r } From 2dee7e55610dc49d810c3e55f33bd3a36576c6a5 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sun, 6 Oct 2019 13:25:10 +0000 Subject: [PATCH 059/254] [X86][AVX] combineExtractSubvector - merge duplicate variables. NFCI. llvm-svn: 373849 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 35 ++++++++++++------------- 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index e139d990c414e..ca770faad133d 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -44176,12 +44176,15 @@ static SDValue combineExtractSubvector(SDNode *N, SelectionDAG &DAG, return SDValue(); MVT VT = N->getSimpleValueType(0); - EVT WideVecVT = N->getOperand(0).getValueType(); - SDValue WideVec = peekThroughBitcasts(N->getOperand(0)); + SDValue InVec = N->getOperand(0); + SDValue InVecBC = peekThroughBitcasts(InVec); + EVT InVecVT = InVec.getValueType(); + EVT InVecBCVT = InVecBC.getValueType(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (Subtarget.hasAVX() && !Subtarget.hasAVX2() && - TLI.isTypeLegal(WideVecVT) && - WideVecVT.getSizeInBits() == 256 && WideVec.getOpcode() == ISD::AND) { + TLI.isTypeLegal(InVecVT) && + InVecVT.getSizeInBits() == 256 && InVecBC.getOpcode() == ISD::AND) { auto isConcatenatedNot = [] (SDValue V) { V = peekThroughBitcasts(V); if (!isBitwiseNot(V)) @@ -44189,12 +44192,12 @@ static SDValue combineExtractSubvector(SDNode *N, SelectionDAG &DAG, SDValue NotOp = V->getOperand(0); return peekThroughBitcasts(NotOp).getOpcode() == ISD::CONCAT_VECTORS; }; - if (isConcatenatedNot(WideVec.getOperand(0)) || - isConcatenatedNot(WideVec.getOperand(1))) { + if (isConcatenatedNot(InVecBC.getOperand(0)) || + isConcatenatedNot(InVecBC.getOperand(1))) { // extract (and v4i64 X, (not (concat Y1, Y2))), n -> andnp v2i64 X(n), Y1 - SDValue Concat = split256IntArith(WideVec, DAG); + SDValue Concat = split256IntArith(InVecBC, DAG); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), VT, - DAG.getBitcast(WideVecVT, Concat), N->getOperand(1)); + DAG.getBitcast(InVecVT, Concat), N->getOperand(1)); } } @@ -44204,7 +44207,6 @@ static SDValue combineExtractSubvector(SDNode *N, SelectionDAG &DAG, if (SDValue V = narrowExtractedVectorSelect(N, DAG)) return V; - SDValue InVec = N->getOperand(0); unsigned IdxVal = cast(N->getOperand(1))->getZExtValue(); if (ISD::isBuildVectorAllZeros(InVec.getNode())) @@ -44224,25 +44226,22 @@ static SDValue combineExtractSubvector(SDNode *N, SelectionDAG &DAG, // Try to move vector bitcast after extract_subv by scaling extraction index: // extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index') // TODO: Move this to DAGCombiner::visitEXTRACT_SUBVECTOR - if (InVec.getOpcode() == ISD::BITCAST && - InVec.getOperand(0).getValueType().isVector()) { - SDValue SrcOp = InVec.getOperand(0); - EVT SrcVT = SrcOp.getValueType(); - unsigned SrcNumElts = SrcVT.getVectorNumElements(); - unsigned DestNumElts = InVec.getValueType().getVectorNumElements(); + if (InVec != InVecBC && InVecBCVT.isVector()) { + unsigned SrcNumElts = InVecBCVT.getVectorNumElements(); + unsigned DestNumElts = InVecVT.getVectorNumElements(); if ((DestNumElts % SrcNumElts) == 0) { unsigned DestSrcRatio = DestNumElts / SrcNumElts; if ((VT.getVectorNumElements() % DestSrcRatio) == 0) { unsigned NewExtNumElts = VT.getVectorNumElements() / DestSrcRatio; EVT NewExtVT = EVT::getVectorVT(*DAG.getContext(), - SrcVT.getScalarType(), NewExtNumElts); + InVecBCVT.getScalarType(), NewExtNumElts); if ((N->getConstantOperandVal(1) % DestSrcRatio) == 0 && TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) { unsigned IndexValScaled = N->getConstantOperandVal(1) / DestSrcRatio; SDLoc DL(N); SDValue NewIndex = DAG.getIntPtrConstant(IndexValScaled, DL); SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT, - SrcOp, NewIndex); + InVecBC, NewIndex); return DAG.getBitcast(VT, NewExtract); } } @@ -44288,7 +44287,7 @@ static SDValue combineExtractSubvector(SDNode *N, SelectionDAG &DAG, // we may be able to perform this with a smaller vector width. if (IdxVal == 0 && InVec.hasOneUse()) { unsigned InOpcode = InVec.getOpcode(); - if (VT == MVT::v2f64 && InVec.getValueType() == MVT::v4f64) { + if (VT == MVT::v2f64 && InVecVT == MVT::v4f64) { // v2f64 CVTDQ2PD(v4i32). if (InOpcode == ISD::SINT_TO_FP && InVec.getOperand(0).getValueType() == MVT::v4i32) { From 25ba49824d2d4f2347b4a7cb1623600a76ce9433 Mon Sep 17 00:00:00 2001 From: Amaury Sechet Date: Sun, 6 Oct 2019 14:14:55 +0000 Subject: [PATCH 060/254] [DAGCombine] Match more patterns for half word bswap Summary: It ensures that the bswap is generated even when a part of the subtree already matches a bswap transform. Reviewers: craig.topper, efriedma, RKSimon, lebedev.ri Subscribers: llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D68250 llvm-svn: 373850 --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 58 +++++++++---------- llvm/test/CodeGen/X86/bswap_tree.ll | 21 +------ 2 files changed, 32 insertions(+), 47 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 38fd9742d2d3e..ea73ff865bf07 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -5517,6 +5517,23 @@ static bool isBSwapHWordElement(SDValue N, MutableArrayRef Parts) { return true; } +// Match 2 elements of a packed halfword bswap. +static bool isBSwapHWordPair(SDValue N, MutableArrayRef Parts) { + if (N.getOpcode() == ISD::OR) + return isBSwapHWordElement(N.getOperand(0), Parts) && + isBSwapHWordElement(N.getOperand(1), Parts); + + if (N.getOpcode() == ISD::SRL && N.getOperand(0).getOpcode() == ISD::BSWAP) { + ConstantSDNode *C = isConstOrConstSplat(N.getOperand(1)); + if (!C || C->getAPIntValue() != 16) + return false; + Parts[0] = Parts[1] = N.getOperand(0).getOperand(0).getNode(); + return true; + } + + return false; +} + /// Match a 32-bit packed halfword bswap. That is /// ((x & 0x000000ff) << 8) | /// ((x & 0x0000ff00) >> 8) | @@ -5534,43 +5551,26 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) { return SDValue(); // Look for either - // (or (or (and), (and)), (or (and), (and))) - // (or (or (or (and), (and)), (and)), (and)) - if (N0.getOpcode() != ISD::OR) - return SDValue(); - SDValue N00 = N0.getOperand(0); - SDValue N01 = N0.getOperand(1); + // (or (bswaphpair), (bswaphpair)) + // (or (or (bswaphpair), (and)), (and)) + // (or (or (and), (bswaphpair)), (and)) SDNode *Parts[4] = {}; - if (N1.getOpcode() == ISD::OR && - N00.getNumOperands() == 2 && N01.getNumOperands() == 2) { + if (isBSwapHWordPair(N0, Parts)) { // (or (or (and), (and)), (or (and), (and))) - if (!isBSwapHWordElement(N00, Parts)) - return SDValue(); - - if (!isBSwapHWordElement(N01, Parts)) - return SDValue(); - SDValue N10 = N1.getOperand(0); - if (!isBSwapHWordElement(N10, Parts)) - return SDValue(); - SDValue N11 = N1.getOperand(1); - if (!isBSwapHWordElement(N11, Parts)) + if (!isBSwapHWordPair(N1, Parts)) return SDValue(); - } else { + } else if (N0.getOpcode() != ISD::OR) { // (or (or (or (and), (and)), (and)), (and)) if (!isBSwapHWordElement(N1, Parts)) return SDValue(); - if (!isBSwapHWordElement(N01, Parts)) - return SDValue(); - if (N00.getOpcode() != ISD::OR) - return SDValue(); - SDValue N000 = N00.getOperand(0); - if (!isBSwapHWordElement(N000, Parts)) - return SDValue(); - SDValue N001 = N00.getOperand(1); - if (!isBSwapHWordElement(N001, Parts)) + SDValue N00 = N0.getOperand(0); + SDValue N01 = N0.getOperand(1); + if (!(isBSwapHWordElement(N01, Parts) && isBSwapHWordPair(N00, Parts)) && + !(isBSwapHWordElement(N00, Parts) && isBSwapHWordPair(N01, Parts))) return SDValue(); - } + } else + return SDValue(); // Make sure the parts are all coming from the same node. if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3]) diff --git a/llvm/test/CodeGen/X86/bswap_tree.ll b/llvm/test/CodeGen/X86/bswap_tree.ll index 79a45050b98f0..b136263b179e7 100644 --- a/llvm/test/CodeGen/X86/bswap_tree.ll +++ b/llvm/test/CodeGen/X86/bswap_tree.ll @@ -79,30 +79,15 @@ define i32 @test3(i32 %x) nounwind { ; CHECK-LABEL: test3: ; CHECK: # %bb.0: ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: movl %eax, %ecx -; CHECK-NEXT: andl $16711680, %ecx # imm = 0xFF0000 -; CHECK-NEXT: movl %eax, %edx -; CHECK-NEXT: andl $-16777216, %edx # imm = 0xFF000000 -; CHECK-NEXT: shll $8, %ecx -; CHECK-NEXT: shrl $8, %edx -; CHECK-NEXT: orl %ecx, %edx ; CHECK-NEXT: bswapl %eax -; CHECK-NEXT: shrl $16, %eax -; CHECK-NEXT: orl %edx, %eax +; CHECK-NEXT: roll $16, %eax ; CHECK-NEXT: retl ; ; CHECK64-LABEL: test3: ; CHECK64: # %bb.0: ; CHECK64-NEXT: movl %edi, %eax -; CHECK64-NEXT: andl $16711680, %eax # imm = 0xFF0000 -; CHECK64-NEXT: movl %edi, %ecx -; CHECK64-NEXT: andl $-16777216, %ecx # imm = 0xFF000000 -; CHECK64-NEXT: shll $8, %eax -; CHECK64-NEXT: shrl $8, %ecx -; CHECK64-NEXT: addl %ecx, %eax -; CHECK64-NEXT: bswapl %edi -; CHECK64-NEXT: shrl $16, %edi -; CHECK64-NEXT: orl %edi, %eax +; CHECK64-NEXT: bswapl %eax +; CHECK64-NEXT: roll $16, %eax ; CHECK64-NEXT: retq %byte2 = and i32 %x, 16711680 ; 0x00ff0000 %byte3 = and i32 %x, 4278190080 ; 0xff000000 From aab8b3ab9cf63d23775930414e9153156ac9fbcf Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Sun, 6 Oct 2019 14:15:48 +0000 Subject: [PATCH 061/254] [InstCombine] fold fneg disguised as select+fmul (PR43497) Extends rL373230 and solves the motivating bug (although in a narrow way): https://bugs.llvm.org/show_bug.cgi?id=43497 llvm-svn: 373851 --- .../InstCombine/InstCombineMulDivRem.cpp | 67 ++++++++++++++----- llvm/test/Transforms/InstCombine/fmul.ll | 28 ++++---- 2 files changed, 65 insertions(+), 30 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index a301d9eef60e8..0b9128a9f5a1c 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -124,6 +124,50 @@ static Constant *getLogBase2(Type *Ty, Constant *C) { return ConstantVector::get(Elts); } +// TODO: This is a specific form of a much more general pattern. +// We could detect a select with any binop identity constant, or we +// could use SimplifyBinOp to see if either arm of the select reduces. +// But that needs to be done carefully and/or while removing potential +// reverse canonicalizations as in InstCombiner::foldSelectIntoOp(). +static Value *foldMulSelectToNegate(BinaryOperator &I, + InstCombiner::BuilderTy &Builder) { + Value *Cond, *OtherOp; + + // mul (select Cond, 1, -1), OtherOp --> select Cond, OtherOp, -OtherOp + // mul OtherOp, (select Cond, 1, -1) --> select Cond, OtherOp, -OtherOp + if (match(&I, m_c_Mul(m_OneUse(m_Select(m_Value(Cond), m_One(), m_AllOnes())), + m_Value(OtherOp)))) + return Builder.CreateSelect(Cond, OtherOp, Builder.CreateNeg(OtherOp)); + + // mul (select Cond, -1, 1), OtherOp --> select Cond, -OtherOp, OtherOp + // mul OtherOp, (select Cond, -1, 1) --> select Cond, -OtherOp, OtherOp + if (match(&I, m_c_Mul(m_OneUse(m_Select(m_Value(Cond), m_AllOnes(), m_One())), + m_Value(OtherOp)))) + return Builder.CreateSelect(Cond, Builder.CreateNeg(OtherOp), OtherOp); + + // fmul (select Cond, 1.0, -1.0), OtherOp --> select Cond, OtherOp, -OtherOp + // fmul OtherOp, (select Cond, 1.0, -1.0) --> select Cond, OtherOp, -OtherOp + if (match(&I, m_c_FMul(m_OneUse(m_Select(m_Value(Cond), m_SpecificFP(1.0), + m_SpecificFP(-1.0))), + m_Value(OtherOp)))) { + IRBuilder<>::FastMathFlagGuard FMFGuard(Builder); + Builder.setFastMathFlags(I.getFastMathFlags()); + return Builder.CreateSelect(Cond, OtherOp, Builder.CreateFNeg(OtherOp)); + } + + // fmul (select Cond, -1.0, 1.0), OtherOp --> select Cond, -OtherOp, OtherOp + // fmul OtherOp, (select Cond, -1.0, 1.0) --> select Cond, -OtherOp, OtherOp + if (match(&I, m_c_FMul(m_OneUse(m_Select(m_Value(Cond), m_SpecificFP(-1.0), + m_SpecificFP(1.0))), + m_Value(OtherOp)))) { + IRBuilder<>::FastMathFlagGuard FMFGuard(Builder); + Builder.setFastMathFlags(I.getFastMathFlags()); + return Builder.CreateSelect(Cond, Builder.CreateFNeg(OtherOp), OtherOp); + } + + return nullptr; +} + Instruction *InstCombiner::visitMul(BinaryOperator &I) { if (Value *V = SimplifyMulInst(I.getOperand(0), I.getOperand(1), SQ.getWithInstruction(&I))) @@ -213,24 +257,8 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { if (Instruction *FoldedMul = foldBinOpIntoSelectOrPhi(I)) return FoldedMul; - // TODO: This is a specific form of a much more general pattern. - // We could detect a select with any binop identity constant, or we - // could use SimplifyBinOp to see if either arm of the select reduces. - // But that needs to be done carefully and/or while removing potential - // reverse canonicalizations as in InstCombiner::foldSelectIntoOp(). - // mul (select Cond, 1, -1), Op1 --> select Cond, Op1, -Op1 - // mul (select Cond, -1, 1), Op1 --> select Cond, -Op1, Op1 - // mul Op0, (select Cond, 1, -1) --> select Cond, Op0, -Op0 - // mul Op0, (select Cond, -1, 1) --> select Cond, -Op0, Op0 - Value *Cond; - if (match(Op0, m_OneUse(m_Select(m_Value(Cond), m_One(), m_AllOnes())))) - return SelectInst::Create(Cond, Op1, Builder.CreateNeg(Op1)); - if (match(Op0, m_OneUse(m_Select(m_Value(Cond), m_AllOnes(), m_One())))) - return SelectInst::Create(Cond, Builder.CreateNeg(Op1), Op1); - if (match(Op1, m_OneUse(m_Select(m_Value(Cond), m_One(), m_AllOnes())))) - return SelectInst::Create(Cond, Op0, Builder.CreateNeg(Op0)); - if (match(Op1, m_OneUse(m_Select(m_Value(Cond), m_AllOnes(), m_One())))) - return SelectInst::Create(Cond, Builder.CreateNeg(Op0), Op0); + if (Value *FoldedMul = foldMulSelectToNegate(I, Builder)) + return replaceInstUsesWith(I, FoldedMul); // Simplify mul instructions with a constant RHS. if (isa(Op1)) { @@ -377,6 +405,9 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) { if (Instruction *FoldedMul = foldBinOpIntoSelectOrPhi(I)) return FoldedMul; + if (Value *FoldedMul = foldMulSelectToNegate(I, Builder)) + return replaceInstUsesWith(I, FoldedMul); + // X * -1.0 --> -X Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); if (match(Op1, m_SpecificFP(-1.0))) diff --git a/llvm/test/Transforms/InstCombine/fmul.ll b/llvm/test/Transforms/InstCombine/fmul.ll index 1bcca95d04536..89c957b9d083b 100644 --- a/llvm/test/Transforms/InstCombine/fmul.ll +++ b/llvm/test/Transforms/InstCombine/fmul.ll @@ -994,9 +994,9 @@ define double @fmul_negated_constant_expression(double %x) { define float @negate_if_true(float %x, i1 %cond) { ; CHECK-LABEL: @negate_if_true( -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[COND:%.*]], float -1.000000e+00, float 1.000000e+00 -; CHECK-NEXT: [[R:%.*]] = fmul float [[SEL]], [[X:%.*]] -; CHECK-NEXT: ret float [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = fsub float -0.000000e+00, [[X:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[COND:%.*]], float [[TMP1]], float [[X]] +; CHECK-NEXT: ret float [[TMP2]] ; %sel = select i1 %cond, float -1.0, float 1.0 %r = fmul float %sel, %x @@ -1005,9 +1005,9 @@ define float @negate_if_true(float %x, i1 %cond) { define float @negate_if_false(float %x, i1 %cond) { ; CHECK-LABEL: @negate_if_false( -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[COND:%.*]], float 1.000000e+00, float -1.000000e+00 -; CHECK-NEXT: [[R:%.*]] = fmul arcp float [[SEL]], [[X:%.*]] -; CHECK-NEXT: ret float [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = fsub arcp float -0.000000e+00, [[X:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = select arcp i1 [[COND:%.*]], float [[X]], float [[TMP1]] +; CHECK-NEXT: ret float [[TMP2]] ; %sel = select i1 %cond, float 1.0, float -1.0 %r = fmul arcp float %sel, %x @@ -1017,9 +1017,9 @@ define float @negate_if_false(float %x, i1 %cond) { define <2 x double> @negate_if_true_commute(<2 x double> %px, i1 %cond) { ; CHECK-LABEL: @negate_if_true_commute( ; CHECK-NEXT: [[X:%.*]] = fdiv <2 x double> , [[PX:%.*]] -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[COND:%.*]], <2 x double> , <2 x double> -; CHECK-NEXT: [[R:%.*]] = fmul ninf <2 x double> [[X]], [[SEL]] -; CHECK-NEXT: ret <2 x double> [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = fsub ninf <2 x double> , [[X]] +; CHECK-NEXT: [[TMP2:%.*]] = select ninf i1 [[COND:%.*]], <2 x double> [[TMP1]], <2 x double> [[X]] +; CHECK-NEXT: ret <2 x double> [[TMP2]] ; %x = fdiv <2 x double> , %px ; thwart complexity-based canonicalization %sel = select i1 %cond, <2 x double> , <2 x double> @@ -1030,9 +1030,9 @@ define <2 x double> @negate_if_true_commute(<2 x double> %px, i1 %cond) { define <2 x double> @negate_if_false_commute(<2 x double> %px, <2 x i1> %cond) { ; CHECK-LABEL: @negate_if_false_commute( ; CHECK-NEXT: [[X:%.*]] = fdiv <2 x double> , [[PX:%.*]] -; CHECK-NEXT: [[SEL:%.*]] = select <2 x i1> [[COND:%.*]], <2 x double> , <2 x double> -; CHECK-NEXT: [[R:%.*]] = fmul <2 x double> [[X]], [[SEL]] -; CHECK-NEXT: ret <2 x double> [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = fsub <2 x double> , [[X]] +; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[COND:%.*]], <2 x double> [[X]], <2 x double> [[TMP1]] +; CHECK-NEXT: ret <2 x double> [[TMP2]] ; %x = fdiv <2 x double> , %px ; thwart complexity-based canonicalization %sel = select <2 x i1> %cond, <2 x double> , <2 x double> @@ -1040,6 +1040,8 @@ define <2 x double> @negate_if_false_commute(<2 x double> %px, <2 x i1> %cond) { ret <2 x double> %r } +; Negative test + define float @negate_if_true_extra_use(float %x, i1 %cond) { ; CHECK-LABEL: @negate_if_true_extra_use( ; CHECK-NEXT: [[SEL:%.*]] = select i1 [[COND:%.*]], float -1.000000e+00, float 1.000000e+00 @@ -1053,6 +1055,8 @@ define float @negate_if_true_extra_use(float %x, i1 %cond) { ret float %r } +; Negative test + define <2 x double> @negate_if_true_wrong_constant(<2 x double> %px, i1 %cond) { ; CHECK-LABEL: @negate_if_true_wrong_constant( ; CHECK-NEXT: [[X:%.*]] = fdiv <2 x double> , [[PX:%.*]] From ee68f1ec67c73a89aa4549356a1dca31a71247c8 Mon Sep 17 00:00:00 2001 From: Xiangling Liao Date: Sun, 6 Oct 2019 14:44:22 +0000 Subject: [PATCH 062/254] [NFC] Replace 'isDarwin' with 'IsDarwin' Summary: Replace 'isDarwin' with 'IsDarwin' based on LLVM naming convention. Differential Revision: https://reviews.llvm.org/D68336 llvm-svn: 373852 --- .../Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp | 4 ++-- .../Target/PowerPC/MCTargetDesc/PPCMCExpr.h | 14 ++++++------ llvm/lib/Target/PowerPC/PPC.h | 4 ++-- llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp | 20 ++++++++--------- llvm/lib/Target/PowerPC/PPCFrameLowering.cpp | 6 ++--- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 6 ++--- llvm/lib/Target/PowerPC/PPCMCInstLower.cpp | 22 +++++++++---------- 7 files changed, 38 insertions(+), 38 deletions(-) diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp index d467f5c4a4392..fb9dd5d7aa758 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp @@ -19,8 +19,8 @@ using namespace llvm; const PPCMCExpr* PPCMCExpr::create(VariantKind Kind, const MCExpr *Expr, - bool isDarwin, MCContext &Ctx) { - return new (Ctx) PPCMCExpr(Kind, Expr, isDarwin); + bool IsDarwin, MCContext &Ctx) { + return new (Ctx) PPCMCExpr(Kind, Expr, IsDarwin); } void PPCMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const { diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h index 449e2c34f74df..ad1454566162a 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h @@ -45,21 +45,21 @@ class PPCMCExpr : public MCTargetExpr { /// @{ static const PPCMCExpr *create(VariantKind Kind, const MCExpr *Expr, - bool isDarwin, MCContext &Ctx); + bool IsDarwin, MCContext &Ctx); static const PPCMCExpr *createLo(const MCExpr *Expr, - bool isDarwin, MCContext &Ctx) { - return create(VK_PPC_LO, Expr, isDarwin, Ctx); + bool IsDarwin, MCContext &Ctx) { + return create(VK_PPC_LO, Expr, IsDarwin, Ctx); } static const PPCMCExpr *createHi(const MCExpr *Expr, - bool isDarwin, MCContext &Ctx) { - return create(VK_PPC_HI, Expr, isDarwin, Ctx); + bool IsDarwin, MCContext &Ctx) { + return create(VK_PPC_HI, Expr, IsDarwin, Ctx); } static const PPCMCExpr *createHa(const MCExpr *Expr, - bool isDarwin, MCContext &Ctx) { - return create(VK_PPC_HA, Expr, isDarwin, Ctx); + bool IsDarwin, MCContext &Ctx) { + return create(VK_PPC_HA, Expr, IsDarwin, Ctx); } /// @} diff --git a/llvm/lib/Target/PowerPC/PPC.h b/llvm/lib/Target/PowerPC/PPC.h index 667eb91059990..0534773c4c9ed 100644 --- a/llvm/lib/Target/PowerPC/PPC.h +++ b/llvm/lib/Target/PowerPC/PPC.h @@ -50,10 +50,10 @@ namespace llvm { FunctionPass *createPPCExpandISELPass(); FunctionPass *createPPCPreEmitPeepholePass(); void LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, - AsmPrinter &AP, bool isDarwin); + AsmPrinter &AP, bool IsDarwin); bool LowerPPCMachineOperandToMCOperand(const MachineOperand &MO, MCOperand &OutMO, AsmPrinter &AP, - bool isDarwin); + bool IsDarwin); void initializePPCCTRLoopsPass(PassRegistry&); #ifndef NDEBUG diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp index db30e16d15478..b9e52a11274f0 100644 --- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -517,7 +517,7 @@ void PPCAsmPrinter::EmitTlsCall(const MachineInstr *MI, /// void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { MCInst TmpInst; - bool isDarwin = TM.getTargetTriple().isOSDarwin(); + const bool IsDarwin = TM.getTargetTriple().isOSDarwin(); const Module *M = MF->getFunction().getParent(); PICLevel::Level PL = M->getPICLevel(); @@ -604,7 +604,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { // addis r30, r30, {.LTOC,_GLOBAL_OFFSET_TABLE} - .L0$pb@ha // addi r30, r30, {.LTOC,_GLOBAL_OFFSET_TABLE} - .L0$pb@l // Get the offset from the GOT Base Register to the GOT - LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin); + LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, IsDarwin); if (Subtarget->isSecurePlt() && isPositionIndependent() ) { unsigned PICR = TmpInst.getOperand(0).getReg(); MCSymbol *BaseSymbol = OutContext.getOrCreateSymbol( @@ -655,10 +655,10 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { } } case PPC::LWZtoc: { - assert(!isDarwin && "TOC is an ELF/XCOFF construct."); + assert(!IsDarwin && "TOC is an ELF/XCOFF construct."); // Transform %rN = LWZtoc @op1, %r2 - LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin); + LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, IsDarwin); // Change the opcode to LWZ. TmpInst.setOpcode(PPC::LWZ); @@ -724,7 +724,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { case PPC::LDtocBA: case PPC::LDtoc: { // Transform %x3 = LDtoc @min1, %x2 - LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin); + LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, IsDarwin); // Change the opcode to LD, and the global address operand to be a // reference to the TOC entry we will synthesize later. @@ -755,7 +755,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { case PPC::ADDIStocHA8: { // Transform %xd = ADDIStocHA8 %x2, @sym - LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin); + LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, IsDarwin); // Change the opcode to ADDIS8. If the global address is external, has // common linkage, is a non-local function address, or is a jump table @@ -801,7 +801,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { } case PPC::LDtocL: { // Transform %xd = LDtocL @sym, %xs - LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin); + LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, IsDarwin); // Change the opcode to LD. If the global address is external, has // common linkage, or is a jump table address, then reference the @@ -843,7 +843,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { } case PPC::ADDItocL: { // Transform %xd = ADDItocL %xs, @sym - LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin); + LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, IsDarwin); // Change the opcode to ADDI8. If the global address is external, then // generate a TOC entry and reference that. Otherwise reference the @@ -888,7 +888,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { case PPC::LDgotTprelL: case PPC::LDgotTprelL32: { // Transform %xd = LDgotTprelL @sym, %xs - LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin); + LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, IsDarwin); // Change the opcode to LD. TmpInst.setOpcode(Subtarget->isPPC64() ? PPC::LD : PPC::LWZ); @@ -1130,7 +1130,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { } } - LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin); + LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, IsDarwin); EmitToStreamer(*OutStreamer, TmpInst); } diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp index 8306eb679dd84..06a4d183e7819 100644 --- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -1776,8 +1776,8 @@ void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF, // Save R31 if necessary int FPSI = FI->getFramePointerSaveIndex(); - bool isPPC64 = Subtarget.isPPC64(); - bool isDarwinABI = Subtarget.isDarwinABI(); + const bool isPPC64 = Subtarget.isPPC64(); + const bool IsDarwinABI = Subtarget.isDarwinABI(); MachineFrameInfo &MFI = MF.getFrameInfo(); // If the frame pointer save index hasn't been defined yet. @@ -1826,7 +1826,7 @@ void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF, // For 32-bit SVR4, allocate the nonvolatile CR spill slot iff the // function uses CR 2, 3, or 4. - if (!isPPC64 && !isDarwinABI && + if (!isPPC64 && !IsDarwinABI && (SavedRegs.test(PPC::CR2) || SavedRegs.test(PPC::CR3) || SavedRegs.test(PPC::CR4))) { diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 8d8ffc1199f35..8cf6a660b08bd 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -14516,7 +14516,7 @@ SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, Register PPCTargetLowering::getRegisterByName(const char* RegName, EVT VT, const MachineFunction &MF) const { bool isPPC64 = Subtarget.isPPC64(); - bool isDarwinABI = Subtarget.isDarwinABI(); + bool IsDarwinABI = Subtarget.isDarwinABI(); if ((isPPC64 && VT != MVT::i64 && VT != MVT::i32) || (!isPPC64 && VT != MVT::i32)) @@ -14525,8 +14525,8 @@ Register PPCTargetLowering::getRegisterByName(const char* RegName, EVT VT, bool is64Bit = isPPC64 && VT == MVT::i64; Register Reg = StringSwitch(RegName) .Case("r1", is64Bit ? PPC::X1 : PPC::R1) - .Case("r2", (isDarwinABI || isPPC64) ? Register() : PPC::R2) - .Case("r13", (!isPPC64 && isDarwinABI) ? Register() : + .Case("r2", (IsDarwinABI || isPPC64) ? Register() : PPC::R2) + .Case("r13", (!isPPC64 && IsDarwinABI) ? Register() : (is64Bit ? PPC::X13 : PPC::R13)) .Default(Register()); diff --git a/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp b/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp index 32b9818e70b4a..b6496f189a3ae 100644 --- a/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp +++ b/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp @@ -79,7 +79,7 @@ static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, } static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol, - AsmPrinter &Printer, bool isDarwin) { + AsmPrinter &Printer, bool IsDarwin) { MCContext &Ctx = Printer.OutContext; MCSymbolRefExpr::VariantKind RefKind = MCSymbolRefExpr::VK_None; @@ -137,10 +137,10 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol, // Add ha16() / lo16() markers if required. switch (access) { case PPCII::MO_LO: - Expr = PPCMCExpr::createLo(Expr, isDarwin, Ctx); + Expr = PPCMCExpr::createLo(Expr, IsDarwin, Ctx); break; case PPCII::MO_HA: - Expr = PPCMCExpr::createHa(Expr, isDarwin, Ctx); + Expr = PPCMCExpr::createHa(Expr, IsDarwin, Ctx); break; } @@ -148,20 +148,20 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol, } void llvm::LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, - AsmPrinter &AP, bool isDarwin) { + AsmPrinter &AP, bool IsDarwin) { OutMI.setOpcode(MI->getOpcode()); for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MCOperand MCOp; if (LowerPPCMachineOperandToMCOperand(MI->getOperand(i), MCOp, AP, - isDarwin)) + IsDarwin)) OutMI.addOperand(MCOp); } } bool llvm::LowerPPCMachineOperandToMCOperand(const MachineOperand &MO, MCOperand &OutMO, AsmPrinter &AP, - bool isDarwin) { + bool IsDarwin) { switch (MO.getType()) { default: llvm_unreachable("unknown operand type"); @@ -181,20 +181,20 @@ bool llvm::LowerPPCMachineOperandToMCOperand(const MachineOperand &MO, return true; case MachineOperand::MO_GlobalAddress: case MachineOperand::MO_ExternalSymbol: - OutMO = GetSymbolRef(MO, GetSymbolFromOperand(MO, AP), AP, isDarwin); + OutMO = GetSymbolRef(MO, GetSymbolFromOperand(MO, AP), AP, IsDarwin); return true; case MachineOperand::MO_JumpTableIndex: - OutMO = GetSymbolRef(MO, AP.GetJTISymbol(MO.getIndex()), AP, isDarwin); + OutMO = GetSymbolRef(MO, AP.GetJTISymbol(MO.getIndex()), AP, IsDarwin); return true; case MachineOperand::MO_ConstantPoolIndex: - OutMO = GetSymbolRef(MO, AP.GetCPISymbol(MO.getIndex()), AP, isDarwin); + OutMO = GetSymbolRef(MO, AP.GetCPISymbol(MO.getIndex()), AP, IsDarwin); return true; case MachineOperand::MO_BlockAddress: OutMO = GetSymbolRef(MO, AP.GetBlockAddressSymbol(MO.getBlockAddress()), AP, - isDarwin); + IsDarwin); return true; case MachineOperand::MO_MCSymbol: - OutMO = GetSymbolRef(MO, MO.getMCSymbol(), AP, isDarwin); + OutMO = GetSymbolRef(MO, MO.getMCSymbol(), AP, IsDarwin); return true; case MachineOperand::MO_RegisterMask: return false; From f643fabb525f797f574600d20b46c7aa0bd6c1ee Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Sun, 6 Oct 2019 15:27:34 +0000 Subject: [PATCH 063/254] Revert [DAGCombine] Match more patterns for half word bswap This reverts r373850 (git commit 25ba49824d2d4f2347b4a7cb1623600a76ce9433) This patch appears to cause multiple codegen regression test failures - http://lab.llvm.org:8011/builders/clang-cmake-armv7-quick/builds/10680 llvm-svn: 373853 --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 58 +++++++++---------- llvm/test/CodeGen/X86/bswap_tree.ll | 21 ++++++- 2 files changed, 47 insertions(+), 32 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index ea73ff865bf07..38fd9742d2d3e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -5517,23 +5517,6 @@ static bool isBSwapHWordElement(SDValue N, MutableArrayRef Parts) { return true; } -// Match 2 elements of a packed halfword bswap. -static bool isBSwapHWordPair(SDValue N, MutableArrayRef Parts) { - if (N.getOpcode() == ISD::OR) - return isBSwapHWordElement(N.getOperand(0), Parts) && - isBSwapHWordElement(N.getOperand(1), Parts); - - if (N.getOpcode() == ISD::SRL && N.getOperand(0).getOpcode() == ISD::BSWAP) { - ConstantSDNode *C = isConstOrConstSplat(N.getOperand(1)); - if (!C || C->getAPIntValue() != 16) - return false; - Parts[0] = Parts[1] = N.getOperand(0).getOperand(0).getNode(); - return true; - } - - return false; -} - /// Match a 32-bit packed halfword bswap. That is /// ((x & 0x000000ff) << 8) | /// ((x & 0x0000ff00) >> 8) | @@ -5551,26 +5534,43 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) { return SDValue(); // Look for either - // (or (bswaphpair), (bswaphpair)) - // (or (or (bswaphpair), (and)), (and)) - // (or (or (and), (bswaphpair)), (and)) + // (or (or (and), (and)), (or (and), (and))) + // (or (or (or (and), (and)), (and)), (and)) + if (N0.getOpcode() != ISD::OR) + return SDValue(); + SDValue N00 = N0.getOperand(0); + SDValue N01 = N0.getOperand(1); SDNode *Parts[4] = {}; - if (isBSwapHWordPair(N0, Parts)) { + if (N1.getOpcode() == ISD::OR && + N00.getNumOperands() == 2 && N01.getNumOperands() == 2) { // (or (or (and), (and)), (or (and), (and))) - if (!isBSwapHWordPair(N1, Parts)) + if (!isBSwapHWordElement(N00, Parts)) + return SDValue(); + + if (!isBSwapHWordElement(N01, Parts)) + return SDValue(); + SDValue N10 = N1.getOperand(0); + if (!isBSwapHWordElement(N10, Parts)) + return SDValue(); + SDValue N11 = N1.getOperand(1); + if (!isBSwapHWordElement(N11, Parts)) return SDValue(); - } else if (N0.getOpcode() != ISD::OR) { + } else { // (or (or (or (and), (and)), (and)), (and)) if (!isBSwapHWordElement(N1, Parts)) return SDValue(); - SDValue N00 = N0.getOperand(0); - SDValue N01 = N0.getOperand(1); - if (!(isBSwapHWordElement(N01, Parts) && isBSwapHWordPair(N00, Parts)) && - !(isBSwapHWordElement(N00, Parts) && isBSwapHWordPair(N01, Parts))) + if (!isBSwapHWordElement(N01, Parts)) return SDValue(); - } else - return SDValue(); + if (N00.getOpcode() != ISD::OR) + return SDValue(); + SDValue N000 = N00.getOperand(0); + if (!isBSwapHWordElement(N000, Parts)) + return SDValue(); + SDValue N001 = N00.getOperand(1); + if (!isBSwapHWordElement(N001, Parts)) + return SDValue(); + } // Make sure the parts are all coming from the same node. if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3]) diff --git a/llvm/test/CodeGen/X86/bswap_tree.ll b/llvm/test/CodeGen/X86/bswap_tree.ll index b136263b179e7..79a45050b98f0 100644 --- a/llvm/test/CodeGen/X86/bswap_tree.ll +++ b/llvm/test/CodeGen/X86/bswap_tree.ll @@ -79,15 +79,30 @@ define i32 @test3(i32 %x) nounwind { ; CHECK-LABEL: test3: ; CHECK: # %bb.0: ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: andl $16711680, %ecx # imm = 0xFF0000 +; CHECK-NEXT: movl %eax, %edx +; CHECK-NEXT: andl $-16777216, %edx # imm = 0xFF000000 +; CHECK-NEXT: shll $8, %ecx +; CHECK-NEXT: shrl $8, %edx +; CHECK-NEXT: orl %ecx, %edx ; CHECK-NEXT: bswapl %eax -; CHECK-NEXT: roll $16, %eax +; CHECK-NEXT: shrl $16, %eax +; CHECK-NEXT: orl %edx, %eax ; CHECK-NEXT: retl ; ; CHECK64-LABEL: test3: ; CHECK64: # %bb.0: ; CHECK64-NEXT: movl %edi, %eax -; CHECK64-NEXT: bswapl %eax -; CHECK64-NEXT: roll $16, %eax +; CHECK64-NEXT: andl $16711680, %eax # imm = 0xFF0000 +; CHECK64-NEXT: movl %edi, %ecx +; CHECK64-NEXT: andl $-16777216, %ecx # imm = 0xFF000000 +; CHECK64-NEXT: shll $8, %eax +; CHECK64-NEXT: shrl $8, %ecx +; CHECK64-NEXT: addl %ecx, %eax +; CHECK64-NEXT: bswapl %edi +; CHECK64-NEXT: shrl $16, %edi +; CHECK64-NEXT: orl %edi, %eax ; CHECK64-NEXT: retq %byte2 = and i32 %x, 16711680 ; 0x00ff0000 %byte3 = and i32 %x, 4278190080 ; 0xff000000 From 6d1965141049049579ef5f30a38dd0ffe3549f85 Mon Sep 17 00:00:00 2001 From: DeForest Richards Date: Sun, 6 Oct 2019 15:36:37 +0000 Subject: [PATCH 064/254] [Docs] Adds new Getting Started/Tutorials page Adds a new page for Getting Started/Tutorials topics. Also updates existing topic categories on the User Guides and Reference pages. llvm-svn: 373854 --- llvm/docs/GettingStartedTutorials.rst | 29 ++++++++ llvm/docs/ProgrammingDocumentation.rst | 23 +------ llvm/docs/Reference.rst | 91 ++++++++++++++++++------ llvm/docs/SubsystemDocumentation.rst | 46 +------------ llvm/docs/UserGuides.rst | 95 ++++++++++++++------------ llvm/docs/index.rst | 31 ++------- 6 files changed, 160 insertions(+), 155 deletions(-) create mode 100644 llvm/docs/GettingStartedTutorials.rst diff --git a/llvm/docs/GettingStartedTutorials.rst b/llvm/docs/GettingStartedTutorials.rst new file mode 100644 index 0000000000000..60a5ddf7c06da --- /dev/null +++ b/llvm/docs/GettingStartedTutorials.rst @@ -0,0 +1,29 @@ +Getting Started/Tutorials +========================= + +For those new to the LLVM system. + +.. toctree:: + :hidden: + + GettingStarted + GettingStartedVS + Frontend/PerformanceTips + tutorial/index + +:doc:`GettingStarted` + Discusses how to get up and running quickly with the LLVM infrastructure. + Everything from unpacking and compilation of the distribution to execution + of some tools. + +:doc:`tutorial/index` + Tutorials about using LLVM. Includes a tutorial about making a custom + language with LLVM. + +:doc:`GettingStartedVS` + An addendum to the main Getting Started guide for those using Visual Studio + on Windows. + +:doc:`Frontend/PerformanceTips` + A collection of tips for frontend authors on how to generate IR + which LLVM is able to effectively optimize. \ No newline at end of file diff --git a/llvm/docs/ProgrammingDocumentation.rst b/llvm/docs/ProgrammingDocumentation.rst index 6a4d7aa25ba78..a36127a87dd93 100644 --- a/llvm/docs/ProgrammingDocumentation.rst +++ b/llvm/docs/ProgrammingDocumentation.rst @@ -8,13 +8,10 @@ For developers of applications which use LLVM as a library. Atomics CommandLine - CommandGuide/index ExtendingLLVM HowToSetUpLLVMStyleRTTI ProgrammersManual Extensions - LibFuzzer - FuzzingLLVM ScudoHardenedAllocator OptBisect GwpAsan @@ -42,26 +39,8 @@ For developers of applications which use LLVM as a library. :doc:`GwpAsan` A sampled heap memory error detection toolkit designed for production use. -============ -Command Line -============ - :doc:`CommandLine` Provides information on using the command line parsing library. :doc:`OptBisect` - A command line option for debugging optimization-induced failures. - -:doc:`LLVM Command Guide ` - A reference manual for the LLVM command line utilities ("man" pages for LLVM - tools). - -========= -LibFuzzer -========= - -:doc:`LibFuzzer` - A library for writing in-process guided fuzzers. - -:doc:`FuzzingLLVM` - Information on writing and using Fuzzers to find bugs in LLVM. \ No newline at end of file + A command line option for debugging optimization-induced failures. \ No newline at end of file diff --git a/llvm/docs/Reference.rst b/llvm/docs/Reference.rst index 49ff6b6e0e2db..9346d4d7608f7 100644 --- a/llvm/docs/Reference.rst +++ b/llvm/docs/Reference.rst @@ -3,27 +3,41 @@ Reference LLVM and API reference documentation. +.. contents:: + :local: + .. toctree:: :hidden: - LangRef - TestingGuide + Bugpoint + CommandGuide/index CompilerWriterInfo + FuzzingLLVM + GarbageCollection + GetElementPtr + LangRef + LibFuzzer MIRLangRef - NVPTXUsage - AMDGPUUsage + PDB/index + Statepoints + TestingGuide + YamlIO + +API Reference +------------- + +`Doxygen generated documentation `_ + (`classes `_) + +`Documentation for Go bindings `_ -============== LLVM Reference -============== +-------------- :doc:`LLVM Language Reference Manual ` Defines the LLVM intermediate representation and the assembly form of the different nodes. -:doc:`LLVM Testing Infrastructure Guide ` - A reference manual for using the LLVM testing infrastructure. - :doc:`CompilerWriterInfo` A list of helpful links for compiler writers. @@ -31,17 +45,56 @@ LLVM Reference A reference manual for the MIR serialization format, which is used to test LLVM's code generation passes. -:doc:`NVPTXUsage` - This document describes using the NVPTX backend to compile GPU kernels. +:doc:`YamlIO` + A reference guide for using LLVM's YAML I/O library. -:doc:`AMDGPUUsage` - This document describes using the AMDGPU backend to compile GPU kernels. +:doc:`GetElementPtr` + Answers to some very frequent questions about LLVM's most frequently + misunderstood instruction. -============= -API Reference -============= +====================== +Command Line Utilities +====================== -`Doxygen generated documentation `_ - (`classes `_) +:doc:`LLVM Command Guide ` + A reference manual for the LLVM command line utilities ("man" pages for LLVM + tools). -`Documentation for Go bindings `_ +:doc:`Bugpoint` + Automatic bug finder and test-case reducer description and usage + information. + +:doc:`The Microsoft PDB File Format ` + A detailed description of the Microsoft PDB (Program Database) file format. + +================== +Garbage Collection +================== + +:doc:`GarbageCollection` + The interfaces source-language compilers should use for compiling GC'd + programs. + +:doc:`Statepoints` + This describes a set of experimental extensions for garbage + collection support. + +========= +LibFuzzer +========= + +:doc:`LibFuzzer` + A library for writing in-process guided fuzzers. + +:doc:`FuzzingLLVM` + Information on writing and using Fuzzers to find bugs in LLVM. + +======= +Testing +======= + +:doc:`LLVM Testing Infrastructure Guide ` + A reference manual for using the LLVM testing infrastructure. + +:doc:`TestSuiteGuide` + Describes how to compile and run the test-suite benchmarks. \ No newline at end of file diff --git a/llvm/docs/SubsystemDocumentation.rst b/llvm/docs/SubsystemDocumentation.rst index 69764cbd2822a..81feb43f2eeb0 100644 --- a/llvm/docs/SubsystemDocumentation.rst +++ b/llvm/docs/SubsystemDocumentation.rst @@ -13,14 +13,12 @@ For API clients and LLVM developers. BitCodeFormat BlockFrequencyTerminology BranchWeightMetadata - Bugpoint CodeGenerator ExceptionHandling AddingConstrainedIntrinsics LinkTimeOptimization SegmentedStacks TableGenFundamentals - TableGen/index DebuggingJITedCode GoldPlugin MarkedUpDisassembly @@ -28,15 +26,11 @@ For API clients and LLVM developers. SupportLibrary SourceLevelDebugging Vectorizers - WritingAnLLVMBackend - GarbageCollection - WritingAnLLVMPass HowToUseAttributes StackMaps InAlloca BigEndianNEON CoverageMappingFormat - Statepoints MergeFunctions TypeMetadata TransformMetadata @@ -46,28 +40,12 @@ For API clients and LLVM developers. XRay XRayExample XRayFDRFormat - PDB/index CFIVerify SpeculativeLoadHardening StackSafetyAnalysis LoopTerminology DependenceGraphs/index -:doc:`WritingAnLLVMPass` - Information on how to write LLVM transformations and analyses. - -:doc:`WritingAnLLVMBackend` - Information on how to write LLVM backends for machine targets. - -:doc:`CodeGenerator` - The design and implementation of the LLVM code generator. Useful if you are - working on retargetting LLVM to a new architecture, designing a new codegen - pass, or enhancing existing components. - -:doc:`TableGen ` - Describes the TableGen tool, which is used heavily by the LLVM code - generator. - :doc:`AliasAnalysis` Information on how to write a new alias analysis implementation or how to use existing analyses. @@ -90,10 +68,6 @@ For API clients and LLVM developers. Gives the steps necessary when adding a new constrained math intrinsic to LLVM. -:doc:`Bugpoint` - Automatic bug finder and test-case reducer description and usage - information. - :doc:`BitCodeFormat` This describes the file format and encoding used for LLVM "bc" files. @@ -169,9 +143,6 @@ For API clients and LLVM developers. :doc:`XRayExample` An example of how to debug an application with XRay. -:doc:`The Microsoft PDB File Format ` - A detailed description of the Microsoft PDB (Program Database) file format. - :doc:`CFIVerify` A description of the verification tool for Control Flow Integrity. @@ -182,21 +153,6 @@ For API clients and LLVM developers. This document describes the design of the stack safety analysis of local variables. -:doc:`LoopTerminology` - A document describing Loops and associated terms as used in LLVM. - :doc:`Dependence Graphs ` A description of the design of the various dependence graphs such as - the DDG (Data Dependence Graph). - -================== -Garbage Collection -================== - -:doc:`GarbageCollection` - The interfaces source-language compilers should use for compiling GC'd - programs. - -:doc:`Statepoints` - This describes a set of experimental extensions for garbage - collection support. + the DDG (Data Dependence Graph). \ No newline at end of file diff --git a/llvm/docs/UserGuides.rst b/llvm/docs/UserGuides.rst index d75769a793899..7016a52380ee3 100644 --- a/llvm/docs/UserGuides.rst +++ b/llvm/docs/UserGuides.rst @@ -1,13 +1,14 @@ User Guides =========== -For those new to the LLVM system. - NOTE: If you are a user who is only interested in using an LLVM-based compiler, you should look into `Clang `_ instead. The documentation here is intended for users who have a need to work with the intermediate LLVM representation. +.. contents:: + :local: + .. toctree:: :hidden: @@ -22,9 +23,6 @@ intermediate LLVM representation. MarkdownQuickstartTemplate Phabricator Passes - YamlIO - GetElementPtr - Frontend/PerformanceTips MCJITDesignAndImplementation ORCv2 CodeOfConduct @@ -34,27 +32,14 @@ intermediate LLVM representation. Docker BuildingADistribution Remarks + WritingAnLLVMPass + WritingAnLLVMBackend + TableGen/index + NVPTXUsage + AMDGPUUsage -Building, Packaging, and Distributing LLVM ------------------------------------------- - -How to build, package, and distribute LLVM. - -===== -CMake -===== - -:doc:`BuildingADistribution` - A best-practices guide for using LLVM's CMake build system to package and - distribute LLVM-based tools. - -:doc:`CMake` - An addendum to the main Getting Started guide for those using the `CMake - build system `_. - -===== Clang -===== +----- :doc:`HowToBuildOnARM` Notes on building and testing LLVM/Clang on ARM. @@ -70,36 +55,58 @@ Clang .. __: http://clang.llvm.org/get_started.html -====== -Docker -====== +LLVM Builds and Distributions +----------------------------- + +:doc:`BuildingADistribution` + A best-practices guide for using LLVM's CMake build system to package and + distribute LLVM-based tools. + +:doc:`CMake` + An addendum to the main Getting Started guide for those using the `CMake + build system `_. :doc:`Docker` A reference for using Dockerfiles provided with LLVM. -================= -Additional Topics -================= +Optimizations +------------- -:doc:`HowToCrossCompileBuiltinsOnArm` - Notes on cross-building and testing the compiler-rt builtins for Arm. +:doc:`WritingAnLLVMPass` + Information on how to write LLVM transformations and analyses. :doc:`Passes` A list of optimizations and analyses implemented in LLVM. -:doc:`TestSuiteGuide` - Describes how to compile and run the test-suite benchmarks. +:doc:`LoopTerminology` + A document describing Loops and associated terms as used in LLVM. -:doc:`YamlIO` - A reference guide for using LLVM's YAML I/O library. +:doc:`Remarks` + A reference on the implementation of remarks in LLVM. -:doc:`GetElementPtr` - Answers to some very frequent questions about LLVM's most frequently - misunderstood instruction. +Code Generation +--------------- -:doc:`Frontend/PerformanceTips` - A collection of tips for frontend authors on how to generate IR - which LLVM is able to effectively optimize. +:doc:`WritingAnLLVMBackend` + Information on how to write LLVM backends for machine targets. -:doc:`Remarks` - A reference on the implementation of remarks in LLVM. \ No newline at end of file +:doc:`CodeGenerator` + The design and implementation of the LLVM code generator. Useful if you are + working on retargetting LLVM to a new architecture, designing a new codegen + pass, or enhancing existing components. + +:doc:`TableGen ` + Describes the TableGen tool, which is used heavily by the LLVM code + generator. + +Additional Topics +----------------- + +:doc:`HowToCrossCompileBuiltinsOnArm` + Notes on cross-building and testing the compiler-rt builtins for Arm. + +:doc:`NVPTXUsage` + This document describes using the NVPTX backend to compile GPU kernels. + +:doc:`AMDGPUUsage` + This document describes using the AMDGPU backend to compile GPU kernels. \ No newline at end of file diff --git a/llvm/docs/index.rst b/llvm/docs/index.rst index 17a0706a196bd..f64979ea44ac9 100644 --- a/llvm/docs/index.rst +++ b/llvm/docs/index.rst @@ -53,14 +53,18 @@ Getting Started, How-tos, Developer Guides, and Tutorials. .. toctree:: :hidden: - UserGuides + GettingStartedTutorials ProgrammingDocumentation Reference SubsystemDocumentation + UserGuides -:doc:`UserGuides` +:doc:`GettingStartedTutorials` For those new to the LLVM system. +:doc:`UserGuides` + User guides and How-tos. + :doc:`ProgrammingDocumentation` For developers of applications which use LLVM as a library. @@ -70,29 +74,6 @@ Getting Started, How-tos, Developer Guides, and Tutorials. :doc:`Reference` LLVM and API reference documentation. -Getting Started/Tutorials -------------------------- - -.. toctree:: - :hidden: - - GettingStarted - tutorial/index - GettingStartedVS - -:doc:`GettingStarted` - Discusses how to get up and running quickly with the LLVM infrastructure. - Everything from unpacking and compilation of the distribution to execution - of some tools. - -:doc:`tutorial/index` - Tutorials about using LLVM. Includes a tutorial about making a custom - language with LLVM. - -:doc:`GettingStartedVS` - An addendum to the main Getting Started guide for those using Visual Studio - on Windows. - Community ========= From 5c876303ecdc83fbb4ed54281d0f0b180586ca4f Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sun, 6 Oct 2019 15:42:25 +0000 Subject: [PATCH 065/254] [X86][SSE] resolveTargetShuffleInputs - call getTargetShuffleInputs instead of using setTargetShuffleZeroElements directly. NFCI. llvm-svn: 373855 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index ca770faad133d..abd62d1836d5d 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -7260,7 +7260,7 @@ static bool getTargetShuffleInputs(SDValue Op, const APInt &DemandedElts, return true; } -/// Calls setTargetShuffleZeroElements to resolve a target shuffle mask's inputs +/// Calls getTargetShuffleInputs to resolve a target shuffle mask's inputs /// and set the SM_SentinelUndef and SM_SentinelZero values. Then check the /// remaining input indices in case we now have a unary shuffle and adjust the /// inputs accordingly. @@ -7270,10 +7270,9 @@ static bool resolveTargetShuffleInputs(SDValue Op, const APInt &DemandedElts, SmallVectorImpl &Mask, SelectionDAG &DAG, unsigned Depth, bool ResolveZero) { - if (!setTargetShuffleZeroElements(Op, Mask, Inputs, ResolveZero)) - if (!getFauxShuffleMask(Op, DemandedElts, Mask, Inputs, DAG, Depth, - ResolveZero)) - return false; + if (!getTargetShuffleInputs(Op, DemandedElts, Inputs, Mask, DAG, Depth, + ResolveZero)) + return false; resolveTargetShuffleInputsAndMask(Inputs, Mask); return true; From de0e3aac2a27496545f809f2dffac48b5ab7f594 Mon Sep 17 00:00:00 2001 From: DeForest Richards Date: Sun, 6 Oct 2019 16:10:11 +0000 Subject: [PATCH 066/254] [Docs] Removes Programming Documentation page Removes Programming Documentation page. Also moves existing topics on Programming Documentation page to User Guides and Reference pages. llvm-svn: 373856 --- llvm/docs/GettingStartedTutorials.rst | 15 ++++++--- llvm/docs/ProgrammingDocumentation.rst | 46 -------------------------- llvm/docs/Reference.rst | 35 +++++++++++++++++--- llvm/docs/UserGuides.rst | 8 +++++ llvm/docs/index.rst | 4 --- 5 files changed, 48 insertions(+), 60 deletions(-) delete mode 100644 llvm/docs/ProgrammingDocumentation.rst diff --git a/llvm/docs/GettingStartedTutorials.rst b/llvm/docs/GettingStartedTutorials.rst index 60a5ddf7c06da..a8d60343ddc5b 100644 --- a/llvm/docs/GettingStartedTutorials.rst +++ b/llvm/docs/GettingStartedTutorials.rst @@ -6,9 +6,10 @@ For those new to the LLVM system. .. toctree:: :hidden: + Frontend/PerformanceTips GettingStarted GettingStartedVS - Frontend/PerformanceTips + ProgrammersManual tutorial/index :doc:`GettingStarted` @@ -20,10 +21,14 @@ For those new to the LLVM system. Tutorials about using LLVM. Includes a tutorial about making a custom language with LLVM. -:doc:`GettingStartedVS` - An addendum to the main Getting Started guide for those using Visual Studio - on Windows. +:doc:`ProgrammersManual` + Introduction to the general layout of the LLVM sourcebase, important classes + and APIs, and some tips & tricks. :doc:`Frontend/PerformanceTips` A collection of tips for frontend authors on how to generate IR - which LLVM is able to effectively optimize. \ No newline at end of file + which LLVM is able to effectively optimize. + +:doc:`GettingStartedVS` + An addendum to the main Getting Started guide for those using Visual Studio + on Windows. \ No newline at end of file diff --git a/llvm/docs/ProgrammingDocumentation.rst b/llvm/docs/ProgrammingDocumentation.rst deleted file mode 100644 index a36127a87dd93..0000000000000 --- a/llvm/docs/ProgrammingDocumentation.rst +++ /dev/null @@ -1,46 +0,0 @@ -Programming Documentation -========================= - -For developers of applications which use LLVM as a library. - -.. toctree:: - :hidden: - - Atomics - CommandLine - ExtendingLLVM - HowToSetUpLLVMStyleRTTI - ProgrammersManual - Extensions - ScudoHardenedAllocator - OptBisect - GwpAsan - -:doc:`Atomics` - Information about LLVM's concurrency model. - -:doc:`ProgrammersManual` - Introduction to the general layout of the LLVM sourcebase, important classes - and APIs, and some tips & tricks. - -:doc:`Extensions` - LLVM-specific extensions to tools and formats LLVM seeks compatibility with. - -:doc:`HowToSetUpLLVMStyleRTTI` - How to make ``isa<>``, ``dyn_cast<>``, etc. available for clients of your - class hierarchy. - -:doc:`ExtendingLLVM` - Look here to see how to add instructions and intrinsics to LLVM. - -:doc:`ScudoHardenedAllocator` - A library that implements a security-hardened `malloc()`. - -:doc:`GwpAsan` - A sampled heap memory error detection toolkit designed for production use. - -:doc:`CommandLine` - Provides information on using the command line parsing library. - -:doc:`OptBisect` - A command line option for debugging optimization-induced failures. \ No newline at end of file diff --git a/llvm/docs/Reference.rst b/llvm/docs/Reference.rst index 9346d4d7608f7..0e55be3026670 100644 --- a/llvm/docs/Reference.rst +++ b/llvm/docs/Reference.rst @@ -9,16 +9,22 @@ LLVM and API reference documentation. .. toctree:: :hidden: + Atomics Bugpoint CommandGuide/index CompilerWriterInfo + Extensions FuzzingLLVM GarbageCollection GetElementPtr + GwpAsan + HowToSetUpLLVMStyleRTTI LangRef LibFuzzer MIRLangRef + OptBisect PDB/index + ScudoHardenedAllocator Statepoints TestingGuide YamlIO @@ -38,20 +44,36 @@ LLVM Reference Defines the LLVM intermediate representation and the assembly form of the different nodes. -:doc:`CompilerWriterInfo` - A list of helpful links for compiler writers. - :doc:`Machine IR (MIR) Format Reference Manual ` A reference manual for the MIR serialization format, which is used to test LLVM's code generation passes. -:doc:`YamlIO` - A reference guide for using LLVM's YAML I/O library. +:doc:`Atomics` + Information about LLVM's concurrency model. + +:doc:`CompilerWriterInfo` + A list of helpful links for compiler writers. + +:doc:`Extensions` + LLVM-specific extensions to tools and formats LLVM seeks compatibility with. + +:doc:`HowToSetUpLLVMStyleRTTI` + How to make ``isa<>``, ``dyn_cast<>``, etc. available for clients of your + class hierarchy. :doc:`GetElementPtr` Answers to some very frequent questions about LLVM's most frequently misunderstood instruction. +:doc:`ScudoHardenedAllocator` + A library that implements a security-hardened `malloc()`. + +:doc:`GwpAsan` + A sampled heap memory error detection toolkit designed for production use. + +:doc:`YamlIO` + A reference guide for using LLVM's YAML I/O library. + ====================== Command Line Utilities ====================== @@ -64,6 +86,9 @@ Command Line Utilities Automatic bug finder and test-case reducer description and usage information. +:doc:`OptBisect` + A command line option for debugging optimization-induced failures. + :doc:`The Microsoft PDB File Format ` A detailed description of the Microsoft PDB (Program Database) file format. diff --git a/llvm/docs/UserGuides.rst b/llvm/docs/UserGuides.rst index 7016a52380ee3..ddb3acf6fbd78 100644 --- a/llvm/docs/UserGuides.rst +++ b/llvm/docs/UserGuides.rst @@ -37,6 +37,8 @@ intermediate LLVM representation. TableGen/index NVPTXUsage AMDGPUUsage + ExtendingLLVM + CommandLine Clang ----- @@ -102,6 +104,12 @@ Code Generation Additional Topics ----------------- +:doc:`CommandLine` + Provides information on using the command line parsing library. + +:doc:`ExtendingLLVM` + Look here to see how to add instructions and intrinsics to LLVM. + :doc:`HowToCrossCompileBuiltinsOnArm` Notes on cross-building and testing the compiler-rt builtins for Arm. diff --git a/llvm/docs/index.rst b/llvm/docs/index.rst index f64979ea44ac9..c5ddabfe0404e 100644 --- a/llvm/docs/index.rst +++ b/llvm/docs/index.rst @@ -54,7 +54,6 @@ Getting Started, How-tos, Developer Guides, and Tutorials. :hidden: GettingStartedTutorials - ProgrammingDocumentation Reference SubsystemDocumentation UserGuides @@ -65,9 +64,6 @@ Getting Started, How-tos, Developer Guides, and Tutorials. :doc:`UserGuides` User guides and How-tos. -:doc:`ProgrammingDocumentation` - For developers of applications which use LLVM as a library. - :doc:`SubsystemDocumentation` For API clients and LLVM developers. From dcb75bf843eae4a7f0d6d72930f49f7d31c07f98 Mon Sep 17 00:00:00 2001 From: Whitney Tsang Date: Sun, 6 Oct 2019 16:39:43 +0000 Subject: [PATCH 067/254] [LOOPGUARD] Remove asserts in getLoopGuardBranch Summary: The assertion in getLoopGuardBranch can be a 'return nullptr' under if condition. Authored By: DTharun Reviewer: Whitney, fhahn Reviewed By: Whitney, fhahn Subscribers: fhahn, llvm-commits Tag: LLVM Differential Revision: https://reviews.llvm.org/D66084 llvm-svn: 373857 --- llvm/lib/Analysis/LoopInfo.cpp | 12 +++- llvm/unittests/Analysis/LoopInfoTest.cpp | 85 ++++++++++++++++++++++++ 2 files changed, 94 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Analysis/LoopInfo.cpp b/llvm/lib/Analysis/LoopInfo.cpp index 1dc63a3c3338f..dbab5db7dbc2d 100644 --- a/llvm/lib/Analysis/LoopInfo.cpp +++ b/llvm/lib/Analysis/LoopInfo.cpp @@ -360,11 +360,17 @@ bool Loop::isAuxiliaryInductionVariable(PHINode &AuxIndVar, } BranchInst *Loop::getLoopGuardBranch() const { - assert(isLoopSimplifyForm() && "Only valid for loop in simplify form"); + if (!isLoopSimplifyForm()) + return nullptr; + BasicBlock *Preheader = getLoopPreheader(); - assert(Preheader && getLoopLatch() && + BasicBlock *Latch = getLoopLatch(); + assert(Preheader && Latch && "Expecting a loop with valid preheader and latch"); - assert(isLoopExiting(getLoopLatch()) && "Only valid for rotated loop"); + + // Loop should be in rotate form. + if (!isLoopExiting(Latch)) + return nullptr; // Disallow loops with more than one unique exit block, as we do not verify // that GuardOtherSucc post dominates all exit blocks. diff --git a/llvm/unittests/Analysis/LoopInfoTest.cpp b/llvm/unittests/Analysis/LoopInfoTest.cpp index 2528078ac67b7..5504ac11240b7 100644 --- a/llvm/unittests/Analysis/LoopInfoTest.cpp +++ b/llvm/unittests/Analysis/LoopInfoTest.cpp @@ -1272,6 +1272,91 @@ TEST(LoopInfoTest, AuxiliaryIV) { }); } +TEST(LoopInfoTest, LoopNotInSimplifyForm) { + const char *ModuleStr = + "define void @foo(i32 %n) {\n" + "entry:\n" + " %guard.cmp = icmp sgt i32 %n, 0\n" + " br i1 %guard.cmp, label %for.cond, label %for.end\n" + "for.cond:\n" + " %i.0 = phi i32 [ 0, %entry ], [ %inc, %latch.1 ], [ %inc, %latch.2 ]\n" + " %inc = add nsw i32 %i.0, 1\n" + " %cmp = icmp slt i32 %i.0, %n\n" + " br i1 %cmp, label %latch.1, label %for.end\n" + "latch.1:\n" + " br i1 undef, label %for.cond, label %latch.2\n" + "latch.2:\n" + " br label %for.cond\n" + "for.end:\n" + " ret void\n" + "}\n"; + + // Parse the module. + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleStr); + + runWithLoopInfo(*M, "foo", [&](Function &F, LoopInfo &LI) { + Function::iterator FI = F.begin(); + // First basic block is entry - skip it. + BasicBlock *Header = &*(++FI); + assert(Header && "No header"); + Loop *L = LI.getLoopFor(Header); + EXPECT_NE(L, nullptr); + EXPECT_FALSE(L->isLoopSimplifyForm()); + // No loop guard because loop in not in simplify form. + EXPECT_EQ(L->getLoopGuardBranch(), nullptr); + EXPECT_FALSE(L->isGuarded()); + }); +} + +TEST(LoopInfoTest, LoopLatchNotExiting) { + const char *ModuleStr = + "define void @foo(i32* %A, i32 %ub) {\n" + "entry:\n" + " %guardcmp = icmp slt i32 0, %ub\n" + " br i1 %guardcmp, label %for.preheader, label %for.end\n" + "for.preheader:\n" + " br label %for.body\n" + "for.body:\n" + " %i = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ]\n" + " %idxprom = sext i32 %i to i64\n" + " %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom\n" + " store i32 %i, i32* %arrayidx, align 4\n" + " %inc = add nsw i32 %i, 1\n" + " %cmp = icmp slt i32 %inc, %ub\n" + " br i1 %cmp, label %for.latch, label %for.exit\n" + "for.latch:\n" + " br label %for.body\n" + "for.exit:\n" + " br label %for.end\n" + "for.end:\n" + " ret void\n" + "}\n"; + + // Parse the module. + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleStr); + + runWithLoopInfoPlus( + *M, "foo", + [&](Function &F, LoopInfo &LI, ScalarEvolution &SE) { + Function::iterator FI = F.begin(); + // First two basic block are entry and for.preheader - skip them. + ++FI; + BasicBlock *Header = &*(++FI); + BasicBlock *Latch = &*(++FI); + assert(Header && "No header"); + Loop *L = LI.getLoopFor(Header); + EXPECT_NE(L, nullptr); + EXPECT_TRUE(L->isLoopSimplifyForm()); + EXPECT_EQ(L->getLoopLatch(), Latch); + EXPECT_FALSE(L->isLoopExiting(Latch)); + // No loop guard becuase loop is not exiting on latch. + EXPECT_EQ(L->getLoopGuardBranch(), nullptr); + EXPECT_FALSE(L->isGuarded()); + }); +} + // Examine getUniqueExitBlocks/getUniqueNonLatchExitBlocks functions. TEST(LoopInfoTest, LoopUniqueExitBlocks) { const char *ModuleStr = From 598e7a3a689ace9309595e685ec9af594d15c64c Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Sun, 6 Oct 2019 18:11:53 +0000 Subject: [PATCH 068/254] gn build: make windows build less broken llvm-svn: 373858 --- llvm/utils/gn/build/toolchain/BUILD.gn | 3 +++ 1 file changed, 3 insertions(+) diff --git a/llvm/utils/gn/build/toolchain/BUILD.gn b/llvm/utils/gn/build/toolchain/BUILD.gn index 70e259e116f4b..c36579f28e432 100644 --- a/llvm/utils/gn/build/toolchain/BUILD.gn +++ b/llvm/utils/gn/build/toolchain/BUILD.gn @@ -265,6 +265,7 @@ toolchain("win") { dllfile, libfile, ] + lib_switch = "" default_output_extension = ".dll" restat = true @@ -287,6 +288,7 @@ toolchain("win") { outputs = [ dllfile, ] + lib_switch = "" runtime_outputs = outputs default_output_extension = ".dll" @@ -302,6 +304,7 @@ toolchain("win") { outputs = [ outfile, ] + lib_switch = "" default_output_extension = ".exe" # Setting this allows targets to override the default executable output by From 1e9c0cecab5d282aa618e856373b8bb52f28fb91 Mon Sep 17 00:00:00 2001 From: Richard Smith Date: Sun, 6 Oct 2019 18:40:59 +0000 Subject: [PATCH 069/254] [Sema] Avoids an assertion failure when an invalid conversion declaration is used Summary: When using a user-defined conversion function template with a deduced return type the compiler gives a set of warnings: ``` bug.cc:252:44: error: cannot specify any part of a return type in the declaration of a conversion function; use an alias template to declare a conversion to 'auto (Ts &&...) const' template operator auto()(Ts &&... xs) const; ^~~~~~~~~~~~~~~~~~~ bug.cc:252:29: error: conversion function cannot convert to a function type template operator auto()(Ts &&... xs) const; ^ error: pointer to function type cannot have 'const' qualifier ``` after which it triggers an assertion failure. It seems the last error is incorrect and doesn't have any location information. This patch stops the compilation after the second warning. Fixes bug 31422. Patch by Mark de Wever! Reviewers: rsmith Reviewed By: rsmith Subscribers: bbannier, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D64820 llvm-svn: 373862 --- clang/lib/Sema/SemaDecl.cpp | 3 +++ clang/test/SemaCXX/PR31422.cpp | 7 +++++++ 2 files changed, 10 insertions(+) create mode 100644 clang/test/SemaCXX/PR31422.cpp diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 7a169ba51aa91..6114eb8e8d6e9 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -8206,6 +8206,9 @@ static FunctionDecl *CreateNewFunctionDecl(Sema &SemaRef, Declarator &D, } SemaRef.CheckConversionDeclarator(D, R, SC); + if (D.isInvalidType()) + return nullptr; + IsVirtualOkay = true; return CXXConversionDecl::Create( SemaRef.Context, cast(DC), D.getBeginLoc(), NameInfo, R, diff --git a/clang/test/SemaCXX/PR31422.cpp b/clang/test/SemaCXX/PR31422.cpp new file mode 100644 index 0000000000000..0ac321d7af52c --- /dev/null +++ b/clang/test/SemaCXX/PR31422.cpp @@ -0,0 +1,7 @@ +// RUN: %clang_cc1 -std=c++14 -fsyntax-only -verify %s + +// expected-error@+3 {{cannot specify any part of a return type in the declaration of a conversion function; use an alias template to declare a conversion to 'auto (Ts &&...) const'}} +// expected-error@+2 {{conversion function cannot convert to a function type}} +struct S { + template operator auto()(Ts &&... xs) const; +}; From 842dde6be437f5d59304bd10379cf2a61d776b0d Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 6 Oct 2019 18:43:03 +0000 Subject: [PATCH 070/254] [LegalizeTypes][X86] When splitting a vselect for type legalization, don't split a setcc condition if the setcc input is legal and vXi1 conditions are supported Summary: The VSELECT splitting code tries to split a setcc input as well. But on avx512 where mask registers are well supported it should be better to just split the mask and use a single compare. Reviewers: RKSimon, spatel, efriedma Reviewed By: spatel Subscribers: hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D68359 llvm-svn: 373863 --- .../SelectionDAG/LegalizeTypesGeneric.cpp | 15 ++- llvm/test/CodeGen/X86/avx512-vselect.ll | 59 ++++----- .../CodeGen/X86/min-legal-vector-width.ll | 20 +-- llvm/test/CodeGen/X86/pr34177.ll | 121 ++++++++++++------ 4 files changed, 122 insertions(+), 93 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index 560b5729e3def..5562f400b6e1d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -521,9 +521,18 @@ void DAGTypeLegalizer::SplitRes_SELECT(SDNode *N, SDValue &Lo, SDValue &Hi) { GetSplitVector(Cond, CL, CH); // It seems to improve code to generate two narrow SETCCs as opposed to // splitting a wide result vector. - else if (Cond.getOpcode() == ISD::SETCC) - SplitVecRes_SETCC(Cond.getNode(), CL, CH); - else + else if (Cond.getOpcode() == ISD::SETCC) { + // If the condition is a vXi1 vector, and the LHS of the setcc is a legal + // type and the setcc result type is the same vXi1, then leave the setcc + // alone. + EVT CondLHSVT = Cond.getOperand(0).getValueType(); + if (Cond.getValueType().getVectorElementType() == MVT::i1 && + isTypeLegal(CondLHSVT) && + getSetCCResultType(CondLHSVT) == Cond.getValueType()) + std::tie(CL, CH) = DAG.SplitVector(Cond, dl); + else + SplitVecRes_SETCC(Cond.getNode(), CL, CH); + } else std::tie(CL, CH) = DAG.SplitVector(Cond, dl); } diff --git a/llvm/test/CodeGen/X86/avx512-vselect.ll b/llvm/test/CodeGen/X86/avx512-vselect.ll index 7ee4e6674e0e7..07e5aeac015bc 100644 --- a/llvm/test/CodeGen/X86/avx512-vselect.ll +++ b/llvm/test/CodeGen/X86/avx512-vselect.ll @@ -51,10 +51,9 @@ entry: define <16 x i64> @test3(<16 x i8> %x, <16 x i64> %a, <16 x i64> %b) { ; CHECK-SKX-LABEL: test3: ; CHECK-SKX: # %bb.0: -; CHECK-SKX-NEXT: vpshufd {{.*#+}} xmm5 = xmm0[2,3,0,1] -; CHECK-SKX-NEXT: vptestnmb %xmm5, %xmm5, %k1 -; CHECK-SKX-NEXT: vptestnmb %xmm0, %xmm0, %k2 -; CHECK-SKX-NEXT: vpblendmq %zmm1, %zmm3, %zmm0 {%k2} +; CHECK-SKX-NEXT: vptestnmb %xmm0, %xmm0, %k1 +; CHECK-SKX-NEXT: vpblendmq %zmm1, %zmm3, %zmm0 {%k1} +; CHECK-SKX-NEXT: kshiftrw $8, %k1, %k1 ; CHECK-SKX-NEXT: vpblendmq %zmm2, %zmm4, %zmm1 {%k1} ; CHECK-SKX-NEXT: retq ; @@ -76,10 +75,9 @@ define <16 x i64> @test3(<16 x i8> %x, <16 x i64> %a, <16 x i64> %b) { define <16 x i64> @test4(<16 x i16> %x, <16 x i64> %a, <16 x i64> %b) { ; CHECK-SKX-LABEL: test4: ; CHECK-SKX: # %bb.0: -; CHECK-SKX-NEXT: vextracti128 $1, %ymm0, %xmm5 -; CHECK-SKX-NEXT: vptestnmw %xmm5, %xmm5, %k1 -; CHECK-SKX-NEXT: vptestnmw %xmm0, %xmm0, %k2 -; CHECK-SKX-NEXT: vpblendmq %zmm1, %zmm3, %zmm0 {%k2} +; CHECK-SKX-NEXT: vptestnmw %ymm0, %ymm0, %k1 +; CHECK-SKX-NEXT: vpblendmq %zmm1, %zmm3, %zmm0 {%k1} +; CHECK-SKX-NEXT: kshiftrw $8, %k1, %k1 ; CHECK-SKX-NEXT: vpblendmq %zmm2, %zmm4, %zmm1 {%k1} ; CHECK-SKX-NEXT: retq ; @@ -99,23 +97,13 @@ define <16 x i64> @test4(<16 x i16> %x, <16 x i64> %a, <16 x i64> %b) { } define <16 x i64> @test5(<16 x i32> %x, <16 x i64> %a, <16 x i64> %b) { -; CHECK-SKX-LABEL: test5: -; CHECK-SKX: # %bb.0: -; CHECK-SKX-NEXT: vextracti64x4 $1, %zmm0, %ymm5 -; CHECK-SKX-NEXT: vptestnmd %ymm5, %ymm5, %k1 -; CHECK-SKX-NEXT: vptestnmd %ymm0, %ymm0, %k2 -; CHECK-SKX-NEXT: vpblendmq %zmm1, %zmm3, %zmm0 {%k2} -; CHECK-SKX-NEXT: vpblendmq %zmm2, %zmm4, %zmm1 {%k1} -; CHECK-SKX-NEXT: retq -; -; CHECK-KNL-LABEL: test5: -; CHECK-KNL: # %bb.0: -; CHECK-KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm5 -; CHECK-KNL-NEXT: vptestnmd %zmm5, %zmm5, %k1 -; CHECK-KNL-NEXT: vptestnmd %zmm0, %zmm0, %k2 -; CHECK-KNL-NEXT: vpblendmq %zmm1, %zmm3, %zmm0 {%k2} -; CHECK-KNL-NEXT: vpblendmq %zmm2, %zmm4, %zmm1 {%k1} -; CHECK-KNL-NEXT: retq +; CHECK-LABEL: test5: +; CHECK: # %bb.0: +; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1 +; CHECK-NEXT: vpblendmq %zmm1, %zmm3, %zmm0 {%k1} +; CHECK-NEXT: kshiftrw $8, %k1, %k1 +; CHECK-NEXT: vpblendmq %zmm2, %zmm4, %zmm1 {%k1} +; CHECK-NEXT: retq %c = icmp eq <16 x i32> %x, zeroinitializer %ret = select <16 x i1> %c, <16 x i64> %a, <16 x i64> %b ret <16 x i64> %ret @@ -124,10 +112,9 @@ define <16 x i64> @test5(<16 x i32> %x, <16 x i64> %a, <16 x i64> %b) { define <32 x i32> @test6(<32 x i8> %x, <32 x i32> %a, <32 x i32> %b) { ; CHECK-SKX-LABEL: test6: ; CHECK-SKX: # %bb.0: -; CHECK-SKX-NEXT: vextracti128 $1, %ymm0, %xmm5 -; CHECK-SKX-NEXT: vptestnmb %xmm5, %xmm5, %k1 -; CHECK-SKX-NEXT: vptestnmb %xmm0, %xmm0, %k2 -; CHECK-SKX-NEXT: vpblendmd %zmm1, %zmm3, %zmm0 {%k2} +; CHECK-SKX-NEXT: vptestnmb %ymm0, %ymm0, %k1 +; CHECK-SKX-NEXT: vpblendmd %zmm1, %zmm3, %zmm0 {%k1} +; CHECK-SKX-NEXT: kshiftrd $16, %k1, %k1 ; CHECK-SKX-NEXT: vpblendmd %zmm2, %zmm4, %zmm1 {%k1} ; CHECK-SKX-NEXT: retq ; @@ -151,10 +138,9 @@ define <32 x i32> @test6(<32 x i8> %x, <32 x i32> %a, <32 x i32> %b) { define <32 x i32> @test7(<32 x i16> %x, <32 x i32> %a, <32 x i32> %b) { ; CHECK-SKX-LABEL: test7: ; CHECK-SKX: # %bb.0: -; CHECK-SKX-NEXT: vextracti64x4 $1, %zmm0, %ymm5 -; CHECK-SKX-NEXT: vptestnmw %ymm5, %ymm5, %k1 -; CHECK-SKX-NEXT: vptestnmw %ymm0, %ymm0, %k2 -; CHECK-SKX-NEXT: vpblendmd %zmm1, %zmm3, %zmm0 {%k2} +; CHECK-SKX-NEXT: vptestnmw %zmm0, %zmm0, %k1 +; CHECK-SKX-NEXT: vpblendmd %zmm1, %zmm3, %zmm0 {%k1} +; CHECK-SKX-NEXT: kshiftrd $16, %k1, %k1 ; CHECK-SKX-NEXT: vpblendmd %zmm2, %zmm4, %zmm1 {%k1} ; CHECK-SKX-NEXT: retq ; @@ -179,10 +165,9 @@ define <32 x i32> @test7(<32 x i16> %x, <32 x i32> %a, <32 x i32> %b) { define <64 x i16> @test8(<64 x i8> %x, <64 x i16> %a, <64 x i16> %b) { ; CHECK-SKX-LABEL: test8: ; CHECK-SKX: # %bb.0: -; CHECK-SKX-NEXT: vextracti64x4 $1, %zmm0, %ymm5 -; CHECK-SKX-NEXT: vptestnmb %ymm5, %ymm5, %k1 -; CHECK-SKX-NEXT: vptestnmb %ymm0, %ymm0, %k2 -; CHECK-SKX-NEXT: vpblendmw %zmm1, %zmm3, %zmm0 {%k2} +; CHECK-SKX-NEXT: vptestnmb %zmm0, %zmm0, %k1 +; CHECK-SKX-NEXT: vpblendmw %zmm1, %zmm3, %zmm0 {%k1} +; CHECK-SKX-NEXT: kshiftrq $32, %k1, %k1 ; CHECK-SKX-NEXT: vpblendmw %zmm2, %zmm4, %zmm1 {%k1} ; CHECK-SKX-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/min-legal-vector-width.ll b/llvm/test/CodeGen/X86/min-legal-vector-width.ll index 46e73c1f8542b..88329600b2336 100644 --- a/llvm/test/CodeGen/X86/min-legal-vector-width.ll +++ b/llvm/test/CodeGen/X86/min-legal-vector-width.ll @@ -1013,9 +1013,7 @@ define void @vselect_split_v8i16_setcc(<8 x i16> %s, <8 x i16> %t, <8 x i64>* %p ; CHECK-NEXT: vmovdqa (%rsi), %ymm2 ; CHECK-NEXT: vmovdqa 32(%rsi), %ymm3 ; CHECK-NEXT: vpcmpeqw %xmm1, %xmm0, %k1 -; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] -; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] -; CHECK-NEXT: vpcmpeqw %xmm1, %xmm0, %k2 +; CHECK-NEXT: kshiftrb $4, %k1, %k2 ; CHECK-NEXT: vmovdqa64 32(%rdi), %ymm3 {%k2} ; CHECK-NEXT: vmovdqa64 (%rdi), %ymm2 {%k1} ; CHECK-NEXT: vmovdqa %ymm2, (%rdx) @@ -1035,10 +1033,8 @@ define void @vselect_split_v8i32_setcc(<8 x i32> %s, <8 x i32> %t, <8 x i64>* %p ; CHECK: # %bb.0: ; CHECK-NEXT: vmovdqa (%rsi), %ymm2 ; CHECK-NEXT: vmovdqa 32(%rsi), %ymm3 -; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k1 -; CHECK-NEXT: vextracti128 $1, %ymm1, %xmm1 -; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm0 -; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k2 +; CHECK-NEXT: vpcmpeqd %ymm1, %ymm0, %k1 +; CHECK-NEXT: kshiftrb $4, %k1, %k2 ; CHECK-NEXT: vmovdqa64 32(%rdi), %ymm3 {%k2} ; CHECK-NEXT: vmovdqa64 (%rdi), %ymm2 {%k1} ; CHECK-NEXT: vmovdqa %ymm2, (%rdx) @@ -1059,9 +1055,7 @@ define void @vselect_split_v16i8_setcc(<16 x i8> %s, <16 x i8> %t, <16 x i32>* % ; CHECK-NEXT: vmovdqa (%rsi), %ymm2 ; CHECK-NEXT: vmovdqa 32(%rsi), %ymm3 ; CHECK-NEXT: vpcmpeqb %xmm1, %xmm0, %k1 -; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] -; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] -; CHECK-NEXT: vpcmpeqb %xmm1, %xmm0, %k2 +; CHECK-NEXT: kshiftrw $8, %k1, %k2 ; CHECK-NEXT: vmovdqa32 32(%rdi), %ymm3 {%k2} ; CHECK-NEXT: vmovdqa32 (%rdi), %ymm2 {%k1} ; CHECK-NEXT: vmovdqa %ymm2, (%rdx) @@ -1081,10 +1075,8 @@ define void @vselect_split_v16i16_setcc(<16 x i16> %s, <16 x i16> %t, <16 x i32> ; CHECK: # %bb.0: ; CHECK-NEXT: vmovdqa (%rsi), %ymm2 ; CHECK-NEXT: vmovdqa 32(%rsi), %ymm3 -; CHECK-NEXT: vpcmpeqw %xmm1, %xmm0, %k1 -; CHECK-NEXT: vextracti128 $1, %ymm1, %xmm1 -; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm0 -; CHECK-NEXT: vpcmpeqw %xmm1, %xmm0, %k2 +; CHECK-NEXT: vpcmpeqw %ymm1, %ymm0, %k1 +; CHECK-NEXT: kshiftrw $8, %k1, %k2 ; CHECK-NEXT: vmovdqa32 32(%rdi), %ymm3 {%k2} ; CHECK-NEXT: vmovdqa32 (%rdi), %ymm2 {%k1} ; CHECK-NEXT: vmovdqa %ymm2, (%rdx) diff --git a/llvm/test/CodeGen/X86/pr34177.ll b/llvm/test/CodeGen/X86/pr34177.ll index 056682bb2750b..f8ead6352f1d3 100644 --- a/llvm/test/CodeGen/X86/pr34177.ll +++ b/llvm/test/CodeGen/X86/pr34177.ll @@ -6,45 +6,88 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" define void @test(<4 x i64> %a, <4 x x86_fp80> %b, <8 x x86_fp80>* %c) local_unnamed_addr { -; CHECK-LABEL: test: -; CHECK: # %bb.0: -; CHECK-NEXT: vmovq %xmm0, %rax -; CHECK-NEXT: vpextrq $1, %xmm0, %rcx -; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm0 -; CHECK-NEXT: vmovq %xmm0, %rdx -; CHECK-NEXT: vpextrq $1, %xmm0, %rsi -; CHECK-NEXT: cmpq $3, %rsi -; CHECK-NEXT: fld1 -; CHECK-NEXT: fldz -; CHECK-NEXT: fld %st(0) -; CHECK-NEXT: fcmove %st(2), %st -; CHECK-NEXT: cmpq $2, %rdx -; CHECK-NEXT: fld %st(1) -; CHECK-NEXT: fcmove %st(3), %st -; CHECK-NEXT: cmpq $1, %rcx -; CHECK-NEXT: fld %st(2) -; CHECK-NEXT: fcmove %st(4), %st -; CHECK-NEXT: testq %rax, %rax -; CHECK-NEXT: fxch %st(3) -; CHECK-NEXT: fcmove %st(4), %st -; CHECK-NEXT: fstp %st(4) -; CHECK-NEXT: fldt {{[0-9]+}}(%rsp) -; CHECK-NEXT: fstpt 70(%rdi) -; CHECK-NEXT: fldt {{[0-9]+}}(%rsp) -; CHECK-NEXT: fstpt 50(%rdi) -; CHECK-NEXT: fldt {{[0-9]+}}(%rsp) -; CHECK-NEXT: fstpt 30(%rdi) -; CHECK-NEXT: fldt {{[0-9]+}}(%rsp) -; CHECK-NEXT: fstpt 10(%rdi) -; CHECK-NEXT: fxch %st(1) -; CHECK-NEXT: fadd %st, %st(0) -; CHECK-NEXT: fstpt 60(%rdi) -; CHECK-NEXT: fadd %st, %st(0) -; CHECK-NEXT: fstpt 40(%rdi) -; CHECK-NEXT: fadd %st, %st(0) -; CHECK-NEXT: fstpt 20(%rdi) -; CHECK-NEXT: fadd %st, %st(0) -; CHECK-NEXT: fstpt (%rdi) +; AVX512F-LABEL: test: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vmovq %xmm0, %rax +; AVX512F-NEXT: vpextrq $1, %xmm0, %rcx +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0 +; AVX512F-NEXT: vmovq %xmm0, %rdx +; AVX512F-NEXT: vpextrq $1, %xmm0, %rsi +; AVX512F-NEXT: cmpq $3, %rsi +; AVX512F-NEXT: fld1 +; AVX512F-NEXT: fldz +; AVX512F-NEXT: fld %st(0) +; AVX512F-NEXT: fcmove %st(2), %st +; AVX512F-NEXT: cmpq $2, %rdx +; AVX512F-NEXT: fld %st(1) +; AVX512F-NEXT: fcmove %st(3), %st +; AVX512F-NEXT: cmpq $1, %rcx +; AVX512F-NEXT: fld %st(2) +; AVX512F-NEXT: fcmove %st(4), %st +; AVX512F-NEXT: testq %rax, %rax +; AVX512F-NEXT: fxch %st(3) +; AVX512F-NEXT: fcmove %st(4), %st +; AVX512F-NEXT: fstp %st(4) +; AVX512F-NEXT: fldt {{[0-9]+}}(%rsp) +; AVX512F-NEXT: fstpt 70(%rdi) +; AVX512F-NEXT: fldt {{[0-9]+}}(%rsp) +; AVX512F-NEXT: fstpt 50(%rdi) +; AVX512F-NEXT: fldt {{[0-9]+}}(%rsp) +; AVX512F-NEXT: fstpt 30(%rdi) +; AVX512F-NEXT: fldt {{[0-9]+}}(%rsp) +; AVX512F-NEXT: fstpt 10(%rdi) +; AVX512F-NEXT: fxch %st(1) +; AVX512F-NEXT: fadd %st, %st(0) +; AVX512F-NEXT: fstpt 60(%rdi) +; AVX512F-NEXT: fadd %st, %st(0) +; AVX512F-NEXT: fstpt 40(%rdi) +; AVX512F-NEXT: fadd %st, %st(0) +; AVX512F-NEXT: fstpt 20(%rdi) +; AVX512F-NEXT: fadd %st, %st(0) +; AVX512F-NEXT: fstpt (%rdi) +; +; AVX512VL-LABEL: test: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpcmpeqq {{.*}}(%rip), %ymm0, %k0 +; AVX512VL-NEXT: kshiftrb $2, %k0, %k1 +; AVX512VL-NEXT: kshiftrb $1, %k0, %k2 +; AVX512VL-NEXT: kmovd %k0, %eax +; AVX512VL-NEXT: testb $1, %al +; AVX512VL-NEXT: fld1 +; AVX512VL-NEXT: fldz +; AVX512VL-NEXT: fld %st(0) +; AVX512VL-NEXT: fcmovne %st(2), %st +; AVX512VL-NEXT: kshiftrb $1, %k1, %k0 +; AVX512VL-NEXT: kmovd %k0, %eax +; AVX512VL-NEXT: testb $1, %al +; AVX512VL-NEXT: fld %st(1) +; AVX512VL-NEXT: fcmovne %st(3), %st +; AVX512VL-NEXT: kmovd %k1, %eax +; AVX512VL-NEXT: testb $1, %al +; AVX512VL-NEXT: fld %st(2) +; AVX512VL-NEXT: fcmovne %st(4), %st +; AVX512VL-NEXT: kmovd %k2, %eax +; AVX512VL-NEXT: testb $1, %al +; AVX512VL-NEXT: fxch %st(3) +; AVX512VL-NEXT: fcmovne %st(4), %st +; AVX512VL-NEXT: fstp %st(4) +; AVX512VL-NEXT: fldt {{[0-9]+}}(%rsp) +; AVX512VL-NEXT: fstpt 70(%rdi) +; AVX512VL-NEXT: fldt {{[0-9]+}}(%rsp) +; AVX512VL-NEXT: fstpt 50(%rdi) +; AVX512VL-NEXT: fldt {{[0-9]+}}(%rsp) +; AVX512VL-NEXT: fstpt 30(%rdi) +; AVX512VL-NEXT: fldt {{[0-9]+}}(%rsp) +; AVX512VL-NEXT: fstpt 10(%rdi) +; AVX512VL-NEXT: fxch %st(1) +; AVX512VL-NEXT: fadd %st, %st(0) +; AVX512VL-NEXT: fstpt (%rdi) +; AVX512VL-NEXT: fadd %st, %st(0) +; AVX512VL-NEXT: fstpt 60(%rdi) +; AVX512VL-NEXT: fadd %st, %st(0) +; AVX512VL-NEXT: fstpt 40(%rdi) +; AVX512VL-NEXT: fadd %st, %st(0) +; AVX512VL-NEXT: fstpt 20(%rdi) %1 = icmp eq <4 x i64> , %a %2 = select <4 x i1> %1, <4 x x86_fp80> , <4 x x86_fp80> zeroinitializer %3 = fadd <4 x x86_fp80> %2, %2 From 570ae49d030c987fd68fc81896301411d87b10f9 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 6 Oct 2019 18:43:08 +0000 Subject: [PATCH 071/254] [X86] Add custom type legalization for v16i64->v16i8 truncate and v8i64->v8i8 truncate when v8i64 isn't legal Summary: The default legalization for v16i64->v16i8 tries to create a multiple stage truncate concatenating after each stage and truncating again. But avx512 implements truncates with multiple uops. So it should be better to truncate all the way to the desired element size and then concatenate the pieces using unpckl instructions. This minimizes the number of 2 uop truncates. The unpcks are all single uop instructions. I tried to handle this by just custom splitting the v16i64->v16i8 shuffle. And hoped that the DAG combiner would leave the two halves in the state needed to make D68374 do the job for each half. This worked for the first half, but the second half got messed up. So I've implemented custom handling for v8i64->v8i8 when v8i64 needs to be split to produce the VTRUNCs directly. Reviewers: RKSimon, spatel Reviewed By: RKSimon Subscribers: hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D68428 llvm-svn: 373864 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 26 +++++++- .../CodeGen/X86/min-legal-vector-width.ll | 41 +++++------- llvm/test/CodeGen/X86/vector-trunc-packus.ll | 65 +++++++++++++++---- llvm/test/CodeGen/X86/vector-trunc-ssat.ll | 59 +++++++++++++---- llvm/test/CodeGen/X86/vector-trunc-usat.ll | 50 ++++++++++---- 5 files changed, 173 insertions(+), 68 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index abd62d1836d5d..44cd5ef63d793 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -1763,6 +1763,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::TRUNCATE, MVT::v16i32, Custom); setOperationAction(ISD::TRUNCATE, MVT::v8i64, Custom); + setOperationAction(ISD::TRUNCATE, MVT::v16i64, Custom); } // We want to custom lower some of our intrinsics. @@ -19329,9 +19330,11 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { assert(VT.getVectorNumElements() == InVT.getVectorNumElements() && "Invalid TRUNCATE operation"); - // If called by the legalizer just return. - if (!DAG.getTargetLoweringInfo().isTypeLegal(InVT)) { - if ((InVT == MVT::v8i64 || InVT == MVT::v16i32) && VT.is128BitVector()) { + // If we're called by the type legalizer, handle a few cases. + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (!TLI.isTypeLegal(InVT)) { + if ((InVT == MVT::v8i64 || InVT == MVT::v16i32 || InVT == MVT::v16i64) && + VT.is128BitVector()) { assert(Subtarget.hasVLX() && "Unexpected subtarget!"); // The default behavior is to truncate one step, concatenate, and then // truncate the remainder. We'd rather produce two 64-bit results and @@ -27958,6 +27961,23 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, return; } } + if (Subtarget.hasVLX() && InVT == MVT::v8i64 && VT == MVT::v8i8 && + getTypeAction(*DAG.getContext(), InVT) == TypeSplitVector && + isTypeLegal(MVT::v4i64)) { + // Input needs to be split and output needs to widened. Let's use two + // VTRUNCs, and shuffle their results together into the wider type. + SDValue Lo, Hi; + std::tie(Lo, Hi) = DAG.SplitVector(In, dl); + + Lo = DAG.getNode(X86ISD::VTRUNC, dl, MVT::v16i8, Lo); + Hi = DAG.getNode(X86ISD::VTRUNC, dl, MVT::v16i8, Hi); + SDValue Res = DAG.getVectorShuffle(MVT::v16i8, dl, Lo, Hi, + { 0, 1, 2, 3, 16, 17, 18, 19, + -1, -1, -1, -1, -1, -1, -1, -1 }); + Results.push_back(Res); + return; + } + return; } case ISD::ANY_EXTEND: diff --git a/llvm/test/CodeGen/X86/min-legal-vector-width.ll b/llvm/test/CodeGen/X86/min-legal-vector-width.ll index 88329600b2336..e3c66e83c83f6 100644 --- a/llvm/test/CodeGen/X86/min-legal-vector-width.ll +++ b/llvm/test/CodeGen/X86/min-legal-vector-width.ll @@ -797,14 +797,12 @@ define <16 x i8> @trunc_v16i64_v16i8(<16 x i64>* %x) nounwind "min-legal-vector- ; CHECK-NEXT: vmovdqa 32(%rdi), %ymm1 ; CHECK-NEXT: vmovdqa 64(%rdi), %ymm2 ; CHECK-NEXT: vmovdqa 96(%rdi), %ymm3 -; CHECK-NEXT: vpmovqd %ymm2, %xmm2 -; CHECK-NEXT: vpmovqd %ymm3, %xmm3 -; CHECK-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 -; CHECK-NEXT: vpmovdb %ymm2, %xmm2 -; CHECK-NEXT: vpmovqd %ymm0, %xmm0 -; CHECK-NEXT: vpmovqd %ymm1, %xmm1 -; CHECK-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; CHECK-NEXT: vpmovdb %ymm0, %xmm0 +; CHECK-NEXT: vpmovqb %ymm3, %xmm3 +; CHECK-NEXT: vpmovqb %ymm2, %xmm2 +; CHECK-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] +; CHECK-NEXT: vpmovqb %ymm1, %xmm1 +; CHECK-NEXT: vpmovqb %ymm0, %xmm0 +; CHECK-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq @@ -829,24 +827,15 @@ define <16 x i8> @trunc_v16i32_v16i8(<16 x i32>* %x) nounwind "min-legal-vector- } define <8 x i8> @trunc_v8i64_v8i8(<8 x i64>* %x) nounwind "min-legal-vector-width"="256" { -; CHECK-AVX512-LABEL: trunc_v8i64_v8i8: -; CHECK-AVX512: # %bb.0: -; CHECK-AVX512-NEXT: vmovdqa (%rdi), %ymm0 -; CHECK-AVX512-NEXT: vmovdqa 32(%rdi), %ymm1 -; CHECK-AVX512-NEXT: vpmovqb %ymm1, %xmm1 -; CHECK-AVX512-NEXT: vpmovqb %ymm0, %xmm0 -; CHECK-AVX512-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; CHECK-AVX512-NEXT: vzeroupper -; CHECK-AVX512-NEXT: retq -; -; CHECK-VBMI-LABEL: trunc_v8i64_v8i8: -; CHECK-VBMI: # %bb.0: -; CHECK-VBMI-NEXT: vmovdqa (%rdi), %ymm1 -; CHECK-VBMI-NEXT: vpbroadcastq {{.*#+}} ymm0 = [4048780183313844224,4048780183313844224,4048780183313844224,4048780183313844224] -; CHECK-VBMI-NEXT: vpermi2b 32(%rdi), %ymm1, %ymm0 -; CHECK-VBMI-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 -; CHECK-VBMI-NEXT: vzeroupper -; CHECK-VBMI-NEXT: retq +; CHECK-LABEL: trunc_v8i64_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vmovdqa (%rdi), %ymm0 +; CHECK-NEXT: vmovdqa 32(%rdi), %ymm1 +; CHECK-NEXT: vpmovqb %ymm1, %xmm1 +; CHECK-NEXT: vpmovqb %ymm0, %xmm0 +; CHECK-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq %a = load <8 x i64>, <8 x i64>* %x %b = trunc <8 x i64> %a to <8 x i8> ret <8 x i8> %b diff --git a/llvm/test/CodeGen/X86/vector-trunc-packus.ll b/llvm/test/CodeGen/X86/vector-trunc-packus.ll index 8b777e9805a35..b0d6a20bdf38b 100644 --- a/llvm/test/CodeGen/X86/vector-trunc-packus.ll +++ b/llvm/test/CodeGen/X86/vector-trunc-packus.ll @@ -2732,20 +2732,57 @@ define <16 x i8> @trunc_packus_v16i64_v16i8(<16 x i64> %a0) { ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; -; AVX512-LABEL: trunc_packus_v16i64_v16i8: -; AVX512: # %bb.0: -; AVX512-NEXT: vpbroadcastq {{.*#+}} zmm2 = [255,255,255,255,255,255,255,255] -; AVX512-NEXT: vpminsq %zmm2, %zmm0, %zmm0 -; AVX512-NEXT: vpminsq %zmm2, %zmm1, %zmm1 -; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512-NEXT: vpmaxsq %zmm2, %zmm1, %zmm1 -; AVX512-NEXT: vpmaxsq %zmm2, %zmm0, %zmm0 -; AVX512-NEXT: vpmovqd %zmm0, %ymm0 -; AVX512-NEXT: vpmovqd %zmm1, %ymm1 -; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512-NEXT: vpmovdb %zmm0, %xmm0 -; AVX512-NEXT: vzeroupper -; AVX512-NEXT: retq +; AVX512F-LABEL: trunc_packus_v16i64_v16i8: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpbroadcastq {{.*#+}} zmm2 = [255,255,255,255,255,255,255,255] +; AVX512F-NEXT: vpminsq %zmm2, %zmm0, %zmm0 +; AVX512F-NEXT: vpminsq %zmm2, %zmm1, %zmm1 +; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX512F-NEXT: vpmaxsq %zmm2, %zmm1, %zmm1 +; AVX512F-NEXT: vpmaxsq %zmm2, %zmm0, %zmm0 +; AVX512F-NEXT: vpmovqd %zmm0, %ymm0 +; AVX512F-NEXT: vpmovqd %zmm1, %ymm1 +; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: trunc_packus_v16i64_v16i8: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX512VL-NEXT: vpmaxsq %zmm2, %zmm1, %zmm1 +; AVX512VL-NEXT: vpmovusqb %zmm1, %xmm1 +; AVX512VL-NEXT: vpmaxsq %zmm2, %zmm0, %zmm0 +; AVX512VL-NEXT: vpmovusqb %zmm0, %xmm0 +; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: trunc_packus_v16i64_v16i8: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vpbroadcastq {{.*#+}} zmm2 = [255,255,255,255,255,255,255,255] +; AVX512BW-NEXT: vpminsq %zmm2, %zmm0, %zmm0 +; AVX512BW-NEXT: vpminsq %zmm2, %zmm1, %zmm1 +; AVX512BW-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX512BW-NEXT: vpmaxsq %zmm2, %zmm1, %zmm1 +; AVX512BW-NEXT: vpmaxsq %zmm2, %zmm0, %zmm0 +; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0 +; AVX512BW-NEXT: vpmovqd %zmm1, %ymm1 +; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; +; AVX512BWVL-LABEL: trunc_packus_v16i64_v16i8: +; AVX512BWVL: # %bb.0: +; AVX512BWVL-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX512BWVL-NEXT: vpmaxsq %zmm2, %zmm1, %zmm1 +; AVX512BWVL-NEXT: vpmovusqb %zmm1, %xmm1 +; AVX512BWVL-NEXT: vpmaxsq %zmm2, %zmm0, %zmm0 +; AVX512BWVL-NEXT: vpmovusqb %zmm0, %xmm0 +; AVX512BWVL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX512BWVL-NEXT: vzeroupper +; AVX512BWVL-NEXT: retq %1 = icmp slt <16 x i64> %a0, %2 = select <16 x i1> %1, <16 x i64> %a0, <16 x i64> %3 = icmp sgt <16 x i64> %2, zeroinitializer diff --git a/llvm/test/CodeGen/X86/vector-trunc-ssat.ll b/llvm/test/CodeGen/X86/vector-trunc-ssat.ll index 8e99c0b64a0d4..774a478a5d72d 100644 --- a/llvm/test/CodeGen/X86/vector-trunc-ssat.ll +++ b/llvm/test/CodeGen/X86/vector-trunc-ssat.ll @@ -2717,20 +2717,51 @@ define <16 x i8> @trunc_ssat_v16i64_v16i8(<16 x i64> %a0) { ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; -; AVX512-LABEL: trunc_ssat_v16i64_v16i8: -; AVX512: # %bb.0: -; AVX512-NEXT: vpbroadcastq {{.*#+}} zmm2 = [127,127,127,127,127,127,127,127] -; AVX512-NEXT: vpminsq %zmm2, %zmm0, %zmm0 -; AVX512-NEXT: vpminsq %zmm2, %zmm1, %zmm1 -; AVX512-NEXT: vpbroadcastq {{.*#+}} zmm2 = [18446744073709551488,18446744073709551488,18446744073709551488,18446744073709551488,18446744073709551488,18446744073709551488,18446744073709551488,18446744073709551488] -; AVX512-NEXT: vpmaxsq %zmm2, %zmm1, %zmm1 -; AVX512-NEXT: vpmaxsq %zmm2, %zmm0, %zmm0 -; AVX512-NEXT: vpmovqd %zmm0, %ymm0 -; AVX512-NEXT: vpmovqd %zmm1, %ymm1 -; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512-NEXT: vpmovdb %zmm0, %xmm0 -; AVX512-NEXT: vzeroupper -; AVX512-NEXT: retq +; AVX512F-LABEL: trunc_ssat_v16i64_v16i8: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpbroadcastq {{.*#+}} zmm2 = [127,127,127,127,127,127,127,127] +; AVX512F-NEXT: vpminsq %zmm2, %zmm0, %zmm0 +; AVX512F-NEXT: vpminsq %zmm2, %zmm1, %zmm1 +; AVX512F-NEXT: vpbroadcastq {{.*#+}} zmm2 = [18446744073709551488,18446744073709551488,18446744073709551488,18446744073709551488,18446744073709551488,18446744073709551488,18446744073709551488,18446744073709551488] +; AVX512F-NEXT: vpmaxsq %zmm2, %zmm1, %zmm1 +; AVX512F-NEXT: vpmaxsq %zmm2, %zmm0, %zmm0 +; AVX512F-NEXT: vpmovqd %zmm0, %ymm0 +; AVX512F-NEXT: vpmovqd %zmm1, %ymm1 +; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: trunc_ssat_v16i64_v16i8: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpmovsqb %zmm1, %xmm1 +; AVX512VL-NEXT: vpmovsqb %zmm0, %xmm0 +; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: trunc_ssat_v16i64_v16i8: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vpbroadcastq {{.*#+}} zmm2 = [127,127,127,127,127,127,127,127] +; AVX512BW-NEXT: vpminsq %zmm2, %zmm0, %zmm0 +; AVX512BW-NEXT: vpminsq %zmm2, %zmm1, %zmm1 +; AVX512BW-NEXT: vpbroadcastq {{.*#+}} zmm2 = [18446744073709551488,18446744073709551488,18446744073709551488,18446744073709551488,18446744073709551488,18446744073709551488,18446744073709551488,18446744073709551488] +; AVX512BW-NEXT: vpmaxsq %zmm2, %zmm1, %zmm1 +; AVX512BW-NEXT: vpmaxsq %zmm2, %zmm0, %zmm0 +; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0 +; AVX512BW-NEXT: vpmovqd %zmm1, %ymm1 +; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; +; AVX512BWVL-LABEL: trunc_ssat_v16i64_v16i8: +; AVX512BWVL: # %bb.0: +; AVX512BWVL-NEXT: vpmovsqb %zmm1, %xmm1 +; AVX512BWVL-NEXT: vpmovsqb %zmm0, %xmm0 +; AVX512BWVL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX512BWVL-NEXT: vzeroupper +; AVX512BWVL-NEXT: retq %1 = icmp slt <16 x i64> %a0, %2 = select <16 x i1> %1, <16 x i64> %a0, <16 x i64> %3 = icmp sgt <16 x i64> %2, diff --git a/llvm/test/CodeGen/X86/vector-trunc-usat.ll b/llvm/test/CodeGen/X86/vector-trunc-usat.ll index 1b38cc43ab114..7489d393585ff 100644 --- a/llvm/test/CodeGen/X86/vector-trunc-usat.ll +++ b/llvm/test/CodeGen/X86/vector-trunc-usat.ll @@ -1842,17 +1842,45 @@ define <16 x i8> @trunc_usat_v16i64_v16i8(<16 x i64> %a0) { ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; -; AVX512-LABEL: trunc_usat_v16i64_v16i8: -; AVX512: # %bb.0: -; AVX512-NEXT: vpbroadcastq {{.*#+}} zmm2 = [255,255,255,255,255,255,255,255] -; AVX512-NEXT: vpminuq %zmm2, %zmm1, %zmm1 -; AVX512-NEXT: vpminuq %zmm2, %zmm0, %zmm0 -; AVX512-NEXT: vpmovqd %zmm0, %ymm0 -; AVX512-NEXT: vpmovqd %zmm1, %ymm1 -; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512-NEXT: vpmovdb %zmm0, %xmm0 -; AVX512-NEXT: vzeroupper -; AVX512-NEXT: retq +; AVX512F-LABEL: trunc_usat_v16i64_v16i8: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpbroadcastq {{.*#+}} zmm2 = [255,255,255,255,255,255,255,255] +; AVX512F-NEXT: vpminuq %zmm2, %zmm1, %zmm1 +; AVX512F-NEXT: vpminuq %zmm2, %zmm0, %zmm0 +; AVX512F-NEXT: vpmovqd %zmm0, %ymm0 +; AVX512F-NEXT: vpmovqd %zmm1, %ymm1 +; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: trunc_usat_v16i64_v16i8: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpmovusqb %zmm1, %xmm1 +; AVX512VL-NEXT: vpmovusqb %zmm0, %xmm0 +; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: trunc_usat_v16i64_v16i8: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vpbroadcastq {{.*#+}} zmm2 = [255,255,255,255,255,255,255,255] +; AVX512BW-NEXT: vpminuq %zmm2, %zmm1, %zmm1 +; AVX512BW-NEXT: vpminuq %zmm2, %zmm0, %zmm0 +; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0 +; AVX512BW-NEXT: vpmovqd %zmm1, %ymm1 +; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; +; AVX512BWVL-LABEL: trunc_usat_v16i64_v16i8: +; AVX512BWVL: # %bb.0: +; AVX512BWVL-NEXT: vpmovusqb %zmm1, %xmm1 +; AVX512BWVL-NEXT: vpmovusqb %zmm0, %xmm0 +; AVX512BWVL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX512BWVL-NEXT: vzeroupper +; AVX512BWVL-NEXT: retq %1 = icmp ult <16 x i64> %a0, %2 = select <16 x i1> %1, <16 x i64> %a0, <16 x i64> %3 = trunc <16 x i64> %2 to <16 x i8> From 344df110e56fa97fa7225f78a3386636a2da7939 Mon Sep 17 00:00:00 2001 From: Richard Smith Date: Sun, 6 Oct 2019 18:50:40 +0000 Subject: [PATCH 072/254] Implements CWG 1601 in [over.ics.rank/4.2] Summary: The overload resolution for enums with a fixed underlying type has changed in the C++14 standard. This patch implements the new rule. Patch by Mark de Wever! Reviewers: rsmith Reviewed By: rsmith Subscribers: cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D65695 llvm-svn: 373866 --- clang/lib/Sema/SemaOverload.cpp | 42 +++++++++++++++++++++++++++++++++ clang/test/CXX/drs/dr16xx.cpp | 12 ++++++++++ clang/test/CXX/drs/dr6xx.cpp | 15 ++++++++---- clang/www/cxx_dr_status.html | 2 +- 4 files changed, 65 insertions(+), 6 deletions(-) diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp index 3db8e3d2e5c83..80bd3562bc7fd 100644 --- a/clang/lib/Sema/SemaOverload.cpp +++ b/clang/lib/Sema/SemaOverload.cpp @@ -3765,6 +3765,34 @@ isBetterReferenceBindingKind(const StandardConversionSequence &SCS1, !SCS2.IsLvalueReference && SCS2.BindsToFunctionLvalue); } +enum class FixedEnumPromotion { + None, + ToUnderlyingType, + ToPromotedUnderlyingType +}; + +/// Returns kind of fixed enum promotion the \a SCS uses. +static FixedEnumPromotion +getFixedEnumPromtion(Sema &S, const StandardConversionSequence &SCS) { + + if (SCS.Second != ICK_Integral_Promotion) + return FixedEnumPromotion::None; + + QualType FromType = SCS.getFromType(); + if (!FromType->isEnumeralType()) + return FixedEnumPromotion::None; + + EnumDecl *Enum = FromType->getAs()->getDecl(); + if (!Enum->isFixed()) + return FixedEnumPromotion::None; + + QualType UnderlyingType = Enum->getIntegerType(); + if (S.Context.hasSameType(SCS.getToType(1), UnderlyingType)) + return FixedEnumPromotion::ToUnderlyingType; + + return FixedEnumPromotion::ToPromotedUnderlyingType; +} + /// CompareStandardConversionSequences - Compare two standard /// conversion sequences to determine whether one is better than the /// other or if they are indistinguishable (C++ 13.3.3.2p3). @@ -3806,6 +3834,20 @@ CompareStandardConversionSequences(Sema &S, SourceLocation Loc, ? ImplicitConversionSequence::Better : ImplicitConversionSequence::Worse; + // C++14 [over.ics.rank]p4b2: + // This is retroactively applied to C++11 by CWG 1601. + // + // A conversion that promotes an enumeration whose underlying type is fixed + // to its underlying type is better than one that promotes to the promoted + // underlying type, if the two are different. + FixedEnumPromotion FEP1 = getFixedEnumPromtion(S, SCS1); + FixedEnumPromotion FEP2 = getFixedEnumPromtion(S, SCS2); + if (FEP1 != FixedEnumPromotion::None && FEP2 != FixedEnumPromotion::None && + FEP1 != FEP2) + return FEP1 == FixedEnumPromotion::ToUnderlyingType + ? ImplicitConversionSequence::Better + : ImplicitConversionSequence::Worse; + // C++ [over.ics.rank]p4b2: // // If class B is derived directly or indirectly from class A, diff --git a/clang/test/CXX/drs/dr16xx.cpp b/clang/test/CXX/drs/dr16xx.cpp index b5047e8fe2ce4..44d5b8f3f5b63 100644 --- a/clang/test/CXX/drs/dr16xx.cpp +++ b/clang/test/CXX/drs/dr16xx.cpp @@ -23,6 +23,18 @@ namespace std { } // std #endif +namespace dr1601 { // dr1601: 10 +enum E : char { e }; +#if __cplusplus < 201103L + // expected-error@-2 {{enumeration types with a fixed underlying type are a C++11 extension}} +#endif +void f(char); +void f(int); +void g() { + f(e); +} +} // namespace dr1601 + namespace dr1611 { // dr1611: dup 1658 struct A { A(int); }; struct B : virtual A { virtual void f() = 0; }; diff --git a/clang/test/CXX/drs/dr6xx.cpp b/clang/test/CXX/drs/dr6xx.cpp index 530c88f86f888..31e3571f500aa 100644 --- a/clang/test/CXX/drs/dr6xx.cpp +++ b/clang/test/CXX/drs/dr6xx.cpp @@ -987,14 +987,19 @@ namespace dr684 { // dr684: sup 1454 } #endif -#if __cplusplus >= 201103L namespace dr685 { // dr685: yes enum E : long { e }; +#if __cplusplus < 201103L + // expected-error@-2 {{enumeration types with a fixed underlying type are a C++11 extension}} +#endif void f(int); int f(long); int a = f(e); enum G : short { g }; +#if __cplusplus < 201103L + // expected-error@-2 {{enumeration types with a fixed underlying type are a C++11 extension}} +#endif int h(short); void h(long); int b = h(g); @@ -1007,11 +1012,11 @@ namespace dr685 { // dr685: yes void j(long); // expected-note {{candidate}} int d = j(g); // expected-error {{ambiguous}} - int k(short); // expected-note {{candidate}} - void k(int); // expected-note {{candidate}} - int x = k(g); // expected-error {{ambiguous}} + // Valid per dr1601 + int k(short); + void k(int); + int x = k(g); } -#endif namespace dr686 { // dr686: yes void f() { diff --git a/clang/www/cxx_dr_status.html b/clang/www/cxx_dr_status.html index 40af4732ef600..83567e29f4897 100755 --- a/clang/www/cxx_dr_status.html +++ b/clang/www/cxx_dr_status.html @@ -9421,7 +9421,7 @@

C++ defect report implementation status

1601 C++14 Promotion of enumeration with fixed underlying type - Unknown + SVN 1602 From 42010dc8108f818d7fe8314116ad6805bc121c1e Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sun, 6 Oct 2019 19:06:45 +0000 Subject: [PATCH 073/254] [X86][SSE] Don't merge known undef/zero elements into target shuffle masks. Replaces setTargetShuffleZeroElements with getTargetShuffleAndZeroables which reports the Zeroable elements but doesn't merge them into the decoded target shuffle mask (the merging has been moved up into getTargetShuffleInputs until we can get rid of it entirely). This is part of the work to fix PR43024 and allow us to use SimplifyDemandedElts to simplify shuffle chains - we need to get to a point where the target shuffle mask isn't adjusted by its source inputs but instead we cache them in a parallel Zeroable mask. llvm-svn: 373867 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 80 +++++++++++++++---------- 1 file changed, 50 insertions(+), 30 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 44cd5ef63d793..52b3a723107f7 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -6726,14 +6726,12 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero, return true; } -/// Check a target shuffle mask's inputs to see if we can set any values to -/// SM_SentinelZero - this is for elements that are known to be zero -/// (not just zeroable) from their inputs. +/// Decode a target shuffle mask and inputs and see if any values are +/// known to be undef or zero from their inputs. /// Returns true if the target shuffle mask was decoded. -static bool setTargetShuffleZeroElements(SDValue N, - SmallVectorImpl &Mask, +static bool getTargetShuffleAndZeroables(SDValue N, SmallVectorImpl &Mask, SmallVectorImpl &Ops, - bool ResolveZero = true) { + APInt &KnownUndef, APInt &KnownZero) { bool IsUnary; if (!isTargetShuffle(N.getOpcode())) return false; @@ -6742,15 +6740,17 @@ static bool setTargetShuffleZeroElements(SDValue N, if (!getTargetShuffleMask(N.getNode(), VT, true, Ops, Mask, IsUnary)) return false; + int Size = Mask.size(); SDValue V1 = Ops[0]; SDValue V2 = IsUnary ? V1 : Ops[1]; + KnownUndef = KnownZero = APInt::getNullValue(Size); V1 = peekThroughBitcasts(V1); V2 = peekThroughBitcasts(V2); assert((VT.getSizeInBits() % Mask.size()) == 0 && "Illegal split of shuffle value type"); - unsigned EltSizeInBits = VT.getSizeInBits() / Mask.size(); + unsigned EltSizeInBits = VT.getSizeInBits() / Size; // Extract known constant input data. APInt UndefSrcElts[2]; @@ -6761,12 +6761,18 @@ static bool setTargetShuffleZeroElements(SDValue N, getTargetConstantBitsFromNode(V2, EltSizeInBits, UndefSrcElts[1], SrcEltBits[1], true, false)}; - for (int i = 0, Size = Mask.size(); i < Size; ++i) { + for (int i = 0; i < Size; ++i) { int M = Mask[i]; // Already decoded as SM_SentinelZero / SM_SentinelUndef. - if (M < 0) + if (M < 0) { + assert(isUndefOrZero(M) && "Unknown shuffle sentinel value!"); + if (SM_SentinelUndef == M) + KnownUndef.setBit(i); + if (SM_SentinelZero == M) + KnownZero.setBit(i); continue; + } // Determine shuffle input and normalize the mask. unsigned SrcIdx = M / Size; @@ -6775,7 +6781,7 @@ static bool setTargetShuffleZeroElements(SDValue N, // We are referencing an UNDEF input. if (V.isUndef()) { - Mask[i] = SM_SentinelUndef; + KnownUndef.setBit(i); continue; } @@ -6788,22 +6794,22 @@ static bool setTargetShuffleZeroElements(SDValue N, int Scale = Size / V.getValueType().getVectorNumElements(); int Idx = M / Scale; if (Idx != 0 && !VT.isFloatingPoint()) - Mask[i] = SM_SentinelUndef; - else if (ResolveZero && Idx == 0 && X86::isZeroNode(V.getOperand(0))) - Mask[i] = SM_SentinelZero; + KnownUndef.setBit(i); + else if (Idx == 0 && X86::isZeroNode(V.getOperand(0))) + KnownZero.setBit(i); continue; } // Attempt to extract from the source's constant bits. if (IsSrcConstant[SrcIdx]) { if (UndefSrcElts[SrcIdx][M]) - Mask[i] = SM_SentinelUndef; - else if (ResolveZero && SrcEltBits[SrcIdx][M] == 0) - Mask[i] = SM_SentinelZero; + KnownUndef.setBit(i); + else if (SrcEltBits[SrcIdx][M] == 0) + KnownZero.setBit(i); } } - assert(VT.getVectorNumElements() == Mask.size() && + assert(VT.getVectorNumElements() == Size && "Different mask size from vector size!"); return true; } @@ -7246,19 +7252,29 @@ static void resolveTargetShuffleInputsAndMask(SmallVectorImpl &Inputs, Inputs = UsedInputs; } -/// Calls setTargetShuffleZeroElements to resolve a target shuffle mask's inputs -/// and set the SM_SentinelUndef and SM_SentinelZero values. +/// Calls getTargetShuffleAndZeroables to resolve a target shuffle mask's inputs +/// and then sets the SM_SentinelUndef and SM_SentinelZero values. /// Returns true if the target shuffle mask was decoded. static bool getTargetShuffleInputs(SDValue Op, const APInt &DemandedElts, SmallVectorImpl &Inputs, SmallVectorImpl &Mask, SelectionDAG &DAG, unsigned Depth, bool ResolveZero) { - if (!setTargetShuffleZeroElements(Op, Mask, Inputs, ResolveZero)) - if (!getFauxShuffleMask(Op, DemandedElts, Mask, Inputs, DAG, Depth, - ResolveZero)) - return false; - return true; + APInt KnownUndef, KnownZero; + if (getTargetShuffleAndZeroables(Op, Mask, Inputs, KnownUndef, KnownZero)) { + for (int i = 0, e = Mask.size(); i != e; ++i) { + int &M = Mask[i]; + if (M < 0) + continue; + if (KnownUndef[i]) + M = SM_SentinelUndef; + else if (ResolveZero && KnownZero[i]) + M = SM_SentinelZero; + } + return true; + } + return getFauxShuffleMask(Op, DemandedElts, Mask, Inputs, DAG, Depth, + ResolveZero); } /// Calls getTargetShuffleInputs to resolve a target shuffle mask's inputs @@ -33552,15 +33568,17 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG, // Attempt to merge insertps Op1 with an inner target shuffle node. SmallVector TargetMask1; SmallVector Ops1; - if (setTargetShuffleZeroElements(Op1, TargetMask1, Ops1)) { - int M = TargetMask1[SrcIdx]; - if (isUndefOrZero(M)) { + APInt KnownUndef1, KnownZero1; + if (getTargetShuffleAndZeroables(Op1, TargetMask1, Ops1, KnownUndef1, + KnownZero1)) { + if (KnownUndef1[SrcIdx] || KnownZero1[SrcIdx]) { // Zero/UNDEF insertion - zero out element and remove dependency. InsertPSMask |= (1u << DstIdx); return DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, DAG.getUNDEF(VT), DAG.getTargetConstant(InsertPSMask, DL, MVT::i8)); } // Update insertps mask srcidx and reference the source input directly. + int M = TargetMask1[SrcIdx]; assert(0 <= M && M < 8 && "Shuffle index out of range"); InsertPSMask = (InsertPSMask & 0x3f) | ((M & 0x3) << 6); Op1 = Ops1[M < 4 ? 0 : 1]; @@ -33571,16 +33589,17 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG, // Attempt to merge insertps Op0 with an inner target shuffle node. SmallVector TargetMask0; SmallVector Ops0; - if (setTargetShuffleZeroElements(Op0, TargetMask0, Ops0)) { + APInt KnownUndef0, KnownZero0; + if (getTargetShuffleAndZeroables(Op0, TargetMask0, Ops0, KnownUndef0, + KnownZero0)) { bool Updated = false; bool UseInput00 = false; bool UseInput01 = false; for (int i = 0; i != 4; ++i) { - int M = TargetMask0[i]; if ((InsertPSMask & (1u << i)) || (i == (int)DstIdx)) { // No change if element is already zero or the inserted element. continue; - } else if (isUndefOrZero(M)) { + } else if (KnownUndef0[i] || KnownZero0[i]) { // If the target mask is undef/zero then we must zero the element. InsertPSMask |= (1u << i); Updated = true; @@ -33588,6 +33607,7 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG, } // The input vector element must be inline. + int M = TargetMask0[i]; if (M != i && M != (i + 4)) return SDValue(); From 739c9f0b79335edf8810b15e67d43a5b41ab1cd5 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sun, 6 Oct 2019 19:07:00 +0000 Subject: [PATCH 074/254] [X86][SSE] Remove resolveTargetShuffleInputs and use getTargetShuffleInputs directly. Move the resolveTargetShuffleInputsAndMask call to after the shuffle mask combine before the undef/zero constant fold instead. llvm-svn: 373868 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 64 +++++++++---------------- 1 file changed, 22 insertions(+), 42 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 52b3a723107f7..7ffe3971b6f83 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -6816,11 +6816,10 @@ static bool getTargetShuffleAndZeroables(SDValue N, SmallVectorImpl &Mask, // Forward declaration (for getFauxShuffleMask recursive check). // TODO: Use DemandedElts variant. -static bool resolveTargetShuffleInputs(SDValue Op, - SmallVectorImpl &Inputs, - SmallVectorImpl &Mask, - SelectionDAG &DAG, unsigned Depth, - bool ResolveZero); +static bool getTargetShuffleInputs(SDValue Op, SmallVectorImpl &Inputs, + SmallVectorImpl &Mask, + SelectionDAG &DAG, unsigned Depth, + bool ResolveZero); // Attempt to decode ops that could be represented as a shuffle mask. // The decoded shuffle mask may contain a different number of elements to the @@ -6923,10 +6922,10 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts, return false; SmallVector SrcMask0, SrcMask1; SmallVector SrcInputs0, SrcInputs1; - if (!resolveTargetShuffleInputs(N0, SrcInputs0, SrcMask0, DAG, Depth + 1, - ResolveZero) || - !resolveTargetShuffleInputs(N1, SrcInputs1, SrcMask1, DAG, Depth + 1, - ResolveZero)) + if (!getTargetShuffleInputs(N0, SrcInputs0, SrcMask0, DAG, Depth + 1, + ResolveZero) || + !getTargetShuffleInputs(N1, SrcInputs1, SrcMask1, DAG, Depth + 1, + ResolveZero)) return false; int MaskSize = std::max(SrcMask0.size(), SrcMask1.size()); SmallVector Mask0, Mask1; @@ -6975,8 +6974,8 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts, // Handle INSERT_SUBVECTOR(SRC0, SHUFFLE(SRC1)). SmallVector SubMask; SmallVector SubInputs; - if (!resolveTargetShuffleInputs(peekThroughOneUseBitcasts(Sub), SubInputs, - SubMask, DAG, Depth + 1, ResolveZero)) + if (!getTargetShuffleInputs(peekThroughOneUseBitcasts(Sub), SubInputs, + SubMask, DAG, Depth + 1, ResolveZero)) return false; if (SubMask.size() != NumSubElts) { assert(((SubMask.size() % NumSubElts) == 0 || @@ -7277,33 +7276,14 @@ static bool getTargetShuffleInputs(SDValue Op, const APInt &DemandedElts, ResolveZero); } -/// Calls getTargetShuffleInputs to resolve a target shuffle mask's inputs -/// and set the SM_SentinelUndef and SM_SentinelZero values. Then check the -/// remaining input indices in case we now have a unary shuffle and adjust the -/// inputs accordingly. -/// Returns true if the target shuffle mask was decoded. -static bool resolveTargetShuffleInputs(SDValue Op, const APInt &DemandedElts, - SmallVectorImpl &Inputs, - SmallVectorImpl &Mask, - SelectionDAG &DAG, unsigned Depth, - bool ResolveZero) { - if (!getTargetShuffleInputs(Op, DemandedElts, Inputs, Mask, DAG, Depth, - ResolveZero)) - return false; - - resolveTargetShuffleInputsAndMask(Inputs, Mask); - return true; -} - -static bool resolveTargetShuffleInputs(SDValue Op, - SmallVectorImpl &Inputs, - SmallVectorImpl &Mask, - SelectionDAG &DAG, unsigned Depth, - bool ResolveZero = true) { +static bool getTargetShuffleInputs(SDValue Op, SmallVectorImpl &Inputs, + SmallVectorImpl &Mask, + SelectionDAG &DAG, unsigned Depth = 0, + bool ResolveZero = true) { unsigned NumElts = Op.getValueType().getVectorNumElements(); APInt DemandedElts = APInt::getAllOnesValue(NumElts); - return resolveTargetShuffleInputs(Op, DemandedElts, Inputs, Mask, DAG, Depth, - ResolveZero); + return getTargetShuffleInputs(Op, DemandedElts, Inputs, Mask, DAG, Depth, + ResolveZero); } /// Returns the scalar element that will make up the ith @@ -33006,7 +32986,7 @@ static SDValue combineX86ShufflesRecursively( // Extract target shuffle mask and resolve sentinels and inputs. SmallVector OpMask; SmallVector OpInputs; - if (!resolveTargetShuffleInputs(Op, OpInputs, OpMask, DAG, Depth)) + if (!getTargetShuffleInputs(Op, OpInputs, OpMask, DAG, Depth)) return SDValue(); // Add the inputs to the Ops list, avoiding duplicates. @@ -33109,6 +33089,9 @@ static SDValue combineX86ShufflesRecursively( Mask[i] = OpMaskedIdx; } + // Remove unused/repeated shuffle source ops. + resolveTargetShuffleInputsAndMask(Ops, Mask); + // Handle the all undef/zero cases early. if (all_of(Mask, [](int Idx) { return Idx == SM_SentinelUndef; })) return DAG.getUNDEF(Root.getValueType()); @@ -33120,10 +33103,7 @@ static SDValue combineX86ShufflesRecursively( return getZeroVector(Root.getSimpleValueType(), Subtarget, DAG, SDLoc(Root)); - // Remove unused/repeated shuffle source ops. - resolveTargetShuffleInputsAndMask(Ops, Mask); assert(!Ops.empty() && "Shuffle with no inputs detected"); - HasVariableMask |= isTargetShuffleVariableMask(Op.getOpcode()); // Update the list of shuffle nodes that have been combined so far. @@ -34962,7 +34942,7 @@ SDValue X86TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode( SmallVector ShuffleMask; SmallVector ShuffleOps; if (VT.isSimple() && VT.isVector() && - resolveTargetShuffleInputs(Op, ShuffleOps, ShuffleMask, DAG, Depth)) { + getTargetShuffleInputs(Op, ShuffleOps, ShuffleMask, DAG, Depth)) { // If all the demanded elts are from one operand and are inline, // then we can use the operand directly. int NumOps = ShuffleOps.size(); @@ -36041,7 +36021,7 @@ static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG, // Resolve the target shuffle inputs and mask. SmallVector Mask; SmallVector Ops; - if (!resolveTargetShuffleInputs(SrcBC, Ops, Mask, DAG, 0)) + if (!getTargetShuffleInputs(SrcBC, Ops, Mask, DAG)) return SDValue(); // Attempt to narrow/widen the shuffle mask to the correct size. From e36415cacffb38cd642044b8fb28a3936b058fef Mon Sep 17 00:00:00 2001 From: Amy Kwan Date: Sun, 6 Oct 2019 19:45:53 +0000 Subject: [PATCH 075/254] [NFC][PowerPC] Reorganize CRNotPat multiclass patterns in PPCInstrInfo.td This is patch aims to group together the `CRNotPat` multi class instantiations within the `PPCInstrInfo.td` file. Integer instantiations of the multi class are grouped together into a section, and the floating point patterns are separated into its own section. Differential Revision: https://reviews.llvm.org/D67975 llvm-svn: 373869 --- llvm/lib/Target/PowerPC/PPCInstrInfo.td | 175 ++++++++++++------------ 1 file changed, 91 insertions(+), 84 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td index 096eb1e0175ce..24183277519b6 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -3575,23 +3575,6 @@ def : Pat<(i1 (setcc i32:$s1, imm:$imm, SETEQ)), (EXTRACT_SUBREG (CMPLWI (XORIS $s1, (HI16 imm:$imm)), (LO16 imm:$imm)), sub_eq)>; -defm : CRNotPat<(i1 (setcc i32:$s1, immZExt16:$imm, SETUGE)), - (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_lt)>; -defm : CRNotPat<(i1 (setcc i32:$s1, imm32SExt16:$imm, SETGE)), - (EXTRACT_SUBREG (CMPWI $s1, imm:$imm), sub_lt)>; -defm : CRNotPat<(i1 (setcc i32:$s1, immZExt16:$imm, SETULE)), - (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_gt)>; -defm : CRNotPat<(i1 (setcc i32:$s1, imm32SExt16:$imm, SETLE)), - (EXTRACT_SUBREG (CMPWI $s1, imm:$imm), sub_gt)>; -defm : CRNotPat<(i1 (setcc i32:$s1, imm32SExt16:$imm, SETNE)), - (EXTRACT_SUBREG (CMPWI $s1, imm:$imm), sub_eq)>; -defm : CRNotPat<(i1 (setcc i32:$s1, immZExt16:$imm, SETNE)), - (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_eq)>; - -defm : CRNotPat<(i1 (setcc i32:$s1, imm:$imm, SETNE)), - (EXTRACT_SUBREG (CMPLWI (XORIS $s1, (HI16 imm:$imm)), - (LO16 imm:$imm)), sub_eq)>; - def : Pat<(i1 (setcc i32:$s1, i32:$s2, SETULT)), (EXTRACT_SUBREG (CMPLW $s1, $s2), sub_lt)>; def : Pat<(i1 (setcc i32:$s1, i32:$s2, SETLT)), @@ -3603,17 +3586,6 @@ def : Pat<(i1 (setcc i32:$s1, i32:$s2, SETGT)), def : Pat<(i1 (setcc i32:$s1, i32:$s2, SETEQ)), (EXTRACT_SUBREG (CMPW $s1, $s2), sub_eq)>; -defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETUGE)), - (EXTRACT_SUBREG (CMPLW $s1, $s2), sub_lt)>; -defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETGE)), - (EXTRACT_SUBREG (CMPW $s1, $s2), sub_lt)>; -defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETULE)), - (EXTRACT_SUBREG (CMPLW $s1, $s2), sub_gt)>; -defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETLE)), - (EXTRACT_SUBREG (CMPW $s1, $s2), sub_gt)>; -defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETNE)), - (EXTRACT_SUBREG (CMPW $s1, $s2), sub_eq)>; - // SETCC for i64. def : Pat<(i1 (setcc i64:$s1, immZExt16:$imm, SETULT)), (EXTRACT_SUBREG (CMPLDI $s1, imm:$imm), sub_lt)>; @@ -3643,6 +3615,47 @@ def : Pat<(i1 (setcc i64:$s1, imm64ZExt32:$imm, SETEQ)), (EXTRACT_SUBREG (CMPLDI (XORIS8 $s1, (HI16 imm:$imm)), (LO16 imm:$imm)), sub_eq)>; +def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETULT)), + (EXTRACT_SUBREG (CMPLD $s1, $s2), sub_lt)>; +def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETLT)), + (EXTRACT_SUBREG (CMPD $s1, $s2), sub_lt)>; +def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETUGT)), + (EXTRACT_SUBREG (CMPLD $s1, $s2), sub_gt)>; +def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETGT)), + (EXTRACT_SUBREG (CMPD $s1, $s2), sub_gt)>; +def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETEQ)), + (EXTRACT_SUBREG (CMPD $s1, $s2), sub_eq)>; + +// Instantiations of CRNotPat for i32. +defm : CRNotPat<(i1 (setcc i32:$s1, immZExt16:$imm, SETUGE)), + (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_lt)>; +defm : CRNotPat<(i1 (setcc i32:$s1, imm32SExt16:$imm, SETGE)), + (EXTRACT_SUBREG (CMPWI $s1, imm:$imm), sub_lt)>; +defm : CRNotPat<(i1 (setcc i32:$s1, immZExt16:$imm, SETULE)), + (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_gt)>; +defm : CRNotPat<(i1 (setcc i32:$s1, imm32SExt16:$imm, SETLE)), + (EXTRACT_SUBREG (CMPWI $s1, imm:$imm), sub_gt)>; +defm : CRNotPat<(i1 (setcc i32:$s1, imm32SExt16:$imm, SETNE)), + (EXTRACT_SUBREG (CMPWI $s1, imm:$imm), sub_eq)>; +defm : CRNotPat<(i1 (setcc i32:$s1, immZExt16:$imm, SETNE)), + (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_eq)>; + +defm : CRNotPat<(i1 (setcc i32:$s1, imm:$imm, SETNE)), + (EXTRACT_SUBREG (CMPLWI (XORIS $s1, (HI16 imm:$imm)), + (LO16 imm:$imm)), sub_eq)>; + +defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETUGE)), + (EXTRACT_SUBREG (CMPLW $s1, $s2), sub_lt)>; +defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETGE)), + (EXTRACT_SUBREG (CMPW $s1, $s2), sub_lt)>; +defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETULE)), + (EXTRACT_SUBREG (CMPLW $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETLE)), + (EXTRACT_SUBREG (CMPW $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETNE)), + (EXTRACT_SUBREG (CMPW $s1, $s2), sub_eq)>; + +// Instantiations of CRNotPat for i64. defm : CRNotPat<(i1 (setcc i64:$s1, immZExt16:$imm, SETUGE)), (EXTRACT_SUBREG (CMPLDI $s1, imm:$imm), sub_lt)>; defm : CRNotPat<(i1 (setcc i64:$s1, imm64SExt16:$imm, SETGE)), @@ -3660,17 +3673,6 @@ defm : CRNotPat<(i1 (setcc i64:$s1, imm64ZExt32:$imm, SETNE)), (EXTRACT_SUBREG (CMPLDI (XORIS8 $s1, (HI16 imm:$imm)), (LO16 imm:$imm)), sub_eq)>; -def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETULT)), - (EXTRACT_SUBREG (CMPLD $s1, $s2), sub_lt)>; -def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETLT)), - (EXTRACT_SUBREG (CMPD $s1, $s2), sub_lt)>; -def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETUGT)), - (EXTRACT_SUBREG (CMPLD $s1, $s2), sub_gt)>; -def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETGT)), - (EXTRACT_SUBREG (CMPD $s1, $s2), sub_gt)>; -def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETEQ)), - (EXTRACT_SUBREG (CMPD $s1, $s2), sub_eq)>; - defm : CRNotPat<(i1 (setcc i64:$s1, i64:$s2, SETUGE)), (EXTRACT_SUBREG (CMPLD $s1, $s2), sub_lt)>; defm : CRNotPat<(i1 (setcc i64:$s1, i64:$s2, SETGE)), @@ -3682,6 +3684,56 @@ defm : CRNotPat<(i1 (setcc i64:$s1, i64:$s2, SETLE)), defm : CRNotPat<(i1 (setcc i64:$s1, i64:$s2, SETNE)), (EXTRACT_SUBREG (CMPD $s1, $s2), sub_eq)>; +let Predicates = [HasFPU] in { +// Instantiations of CRNotPat for f32. +defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETUGE)), + (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_lt)>; +defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETGE)), + (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_lt)>; +defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETULE)), + (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETLE)), + (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETUNE)), + (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_eq)>; +defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETNE)), + (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_eq)>; +defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETO)), + (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_un)>; + +// Instantiations of CRNotPat for f64. +defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETUGE)), + (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_lt)>; +defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETGE)), + (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_lt)>; +defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETULE)), + (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETLE)), + (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETUNE)), + (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_eq)>; +defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETNE)), + (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_eq)>; +defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETO)), + (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_un)>; + +// Instantiations of CRNotPat for f128. +defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETUGE)), + (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_lt)>; +defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETGE)), + (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_lt)>; +defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETULE)), + (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETLE)), + (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETUNE)), + (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_eq)>; +defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETNE)), + (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_eq)>; +defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETO)), + (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_un)>; +} + // SETCC for f32. let Predicates = [HasFPU] in { def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETOLT)), @@ -3699,21 +3751,6 @@ def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETEQ)), def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETUO)), (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_un)>; -defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETUGE)), - (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_lt)>; -defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETGE)), - (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_lt)>; -defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETULE)), - (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_gt)>; -defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETLE)), - (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_gt)>; -defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETUNE)), - (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_eq)>; -defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETNE)), - (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_eq)>; -defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETO)), - (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_un)>; - // SETCC for f64. def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETOLT)), (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_lt)>; @@ -3730,21 +3767,6 @@ def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETEQ)), def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETUO)), (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_un)>; -defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETUGE)), - (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_lt)>; -defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETGE)), - (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_lt)>; -defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETULE)), - (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_gt)>; -defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETLE)), - (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_gt)>; -defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETUNE)), - (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_eq)>; -defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETNE)), - (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_eq)>; -defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETO)), - (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_un)>; - // SETCC for f128. def : Pat<(i1 (setcc f128:$s1, f128:$s2, SETOLT)), (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_lt)>; @@ -3761,21 +3783,6 @@ def : Pat<(i1 (setcc f128:$s1, f128:$s2, SETEQ)), def : Pat<(i1 (setcc f128:$s1, f128:$s2, SETUO)), (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_un)>; -defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETUGE)), - (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_lt)>; -defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETGE)), - (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_lt)>; -defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETULE)), - (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_gt)>; -defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETLE)), - (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_gt)>; -defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETUNE)), - (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_eq)>; -defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETNE)), - (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_eq)>; -defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETO)), - (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_un)>; - } // This must be in this file because it relies on patterns defined in this file From d84cd7caa87e8486a131c76f45595535678c6552 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sun, 6 Oct 2019 19:54:20 +0000 Subject: [PATCH 076/254] Fix signed/unsigned warning. NFCI llvm-svn: 373870 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 7ffe3971b6f83..784bf6d58c55f 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -6809,7 +6809,7 @@ static bool getTargetShuffleAndZeroables(SDValue N, SmallVectorImpl &Mask, } } - assert(VT.getVectorNumElements() == Size && + assert(VT.getVectorNumElements() == (unsigned)Size && "Different mask size from vector size!"); return true; } From b4ba3cbda01e710e64948f43cbf9bfdec5ec5855 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sun, 6 Oct 2019 21:11:45 +0000 Subject: [PATCH 077/254] [X86][AVX] Access a scalar float/double as a free extract from a broadcast load (PR43217) If a fp scalar is loaded and then used as both a scalar and a vector broadcast, perform the load as a broadcast and then extract the scalar for 'free' from the 0th element. This involved switching the order of the X86ISD::BROADCAST combines so we only convert to X86ISD::BROADCAST_LOAD once all other canonicalizations have been attempted. Adds a DAGCombinerInfo::recursivelyDeleteUnusedNodes wrapper. Fixes PR43217 Differential Revision: https://reviews.llvm.org/D68544 llvm-svn: 373871 --- llvm/include/llvm/CodeGen/TargetLowering.h | 2 + llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 5 ++ llvm/lib/Target/X86/X86ISelLowering.cpp | 35 +++++++++----- llvm/test/CodeGen/X86/avx-vbroadcast.ll | 47 +++++++------------ 4 files changed, 47 insertions(+), 42 deletions(-) diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index 999c0ea30223e..a5dfb8b86a879 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -3263,6 +3263,8 @@ class TargetLowering : public TargetLoweringBase { SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true); SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo = true); + bool recursivelyDeleteUnusedNodes(SDNode *N); + void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO); }; diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 38fd9742d2d3e..7ea908437ff91 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -761,6 +761,11 @@ CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) { return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo); } +bool TargetLowering::DAGCombinerInfo:: +recursivelyDeleteUnusedNodes(SDNode *N) { + return ((DAGCombiner*)DC)->recursivelyDeleteUnusedNodes(N); +} + void TargetLowering::DAGCombinerInfo:: CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO); diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 784bf6d58c55f..915046048ff15 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -33429,8 +33429,19 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG, if (Src.getOpcode() == ISD::SCALAR_TO_VECTOR) return DAG.getNode(X86ISD::VBROADCAST, DL, VT, Src.getOperand(0)); + // Share broadcast with the longest vector and extract low subvector (free). + for (SDNode *User : Src->uses()) + if (User != N.getNode() && + (User->getOpcode() == X86ISD::VBROADCAST || + User->getOpcode() == X86ISD::VBROADCAST_LOAD) && + User->getValueSizeInBits(0) > VT.getSizeInBits()) { + return extractSubVector(SDValue(User, 0), 0, DAG, DL, + VT.getSizeInBits()); + } + // vbroadcast(scalarload X) -> vbroadcast_load X - if (!SrcVT.isVector() && Src.hasOneUse() && + // For float loads, extract other uses of the scalar from the broadcast. + if (!SrcVT.isVector() && (Src.hasOneUse() || VT.isFloatingPoint()) && ISD::isNormalLoad(Src.getNode())) { LoadSDNode *LN = cast(Src); SDVTList Tys = DAG.getVTList(VT, MVT::Other); @@ -33438,17 +33449,19 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG, SDValue BcastLd = DAG.getMemIntrinsicNode(X86ISD::VBROADCAST_LOAD, DL, Tys, Ops, LN->getMemoryVT(), LN->getMemOperand()); - DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BcastLd.getValue(1)); - return BcastLd; - } - - // Share broadcast with the longest vector and extract low subvector (free). - for (SDNode *User : Src->uses()) - if (User != N.getNode() && User->getOpcode() == X86ISD::VBROADCAST && - User->getValueSizeInBits(0) > VT.getSizeInBits()) { - return extractSubVector(SDValue(User, 0), 0, DAG, DL, - VT.getSizeInBits()); + // If the load value is used only by N, replace it via CombineTo N. + bool NoReplaceExtract = Src.hasOneUse(); + DCI.CombineTo(N.getNode(), BcastLd); + if (NoReplaceExtract) { + DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BcastLd.getValue(1)); + DCI.recursivelyDeleteUnusedNodes(LN); + } else { + SDValue Scl = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, SrcVT, BcastLd, + DAG.getIntPtrConstant(0, DL)); + DCI.CombineTo(LN, Scl, BcastLd.getValue(1)); } + return N; // Return N so it doesn't get rechecked! + } return SDValue(); } diff --git a/llvm/test/CodeGen/X86/avx-vbroadcast.ll b/llvm/test/CodeGen/X86/avx-vbroadcast.ll index d3a261e9eb7fb..609c02eee0902 100644 --- a/llvm/test/CodeGen/X86/avx-vbroadcast.ll +++ b/llvm/test/CodeGen/X86/avx-vbroadcast.ll @@ -159,18 +159,14 @@ define <4 x double> @C2(double* %ptr, double* %ptr2) nounwind uwtable readnone s ; X32: ## %bb.0: ## %entry ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; X32-NEXT: vmovsd %xmm0, (%eax) -; X32-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] -; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; X32-NEXT: vbroadcastsd (%ecx), %ymm0 +; X32-NEXT: vmovlps %xmm0, (%eax) ; X32-NEXT: retl ; ; X64-LABEL: C2: ; X64: ## %bb.0: ## %entry -; X64-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; X64-NEXT: vmovsd %xmm0, (%rsi) -; X64-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] -; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; X64-NEXT: vbroadcastsd (%rdi), %ymm0 +; X64-NEXT: vmovlps %xmm0, (%rsi) ; X64-NEXT: retq entry: %q = load double, double* %ptr, align 8 @@ -231,18 +227,14 @@ define <8 x float> @D3(float* %ptr, float* %ptr2) nounwind uwtable readnone ssp ; X32: ## %bb.0: ## %entry ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X32-NEXT: vbroadcastss (%ecx), %ymm0 ; X32-NEXT: vmovss %xmm0, (%eax) -; X32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0] -; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: D3: ; X64: ## %bb.0: ## %entry -; X64-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X64-NEXT: vbroadcastss (%rdi), %ymm0 ; X64-NEXT: vmovss %xmm0, (%rsi) -; X64-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0] -; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; X64-NEXT: retq entry: %q = load float, float* %ptr, align 4 @@ -285,16 +277,14 @@ define <4 x float> @e2(float* %ptr, float* %ptr2) nounwind uwtable readnone ssp ; X32: ## %bb.0: ## %entry ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X32-NEXT: vbroadcastss (%ecx), %xmm0 ; X32-NEXT: vmovss %xmm0, (%eax) -; X32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0] ; X32-NEXT: retl ; ; X64-LABEL: e2: ; X64: ## %bb.0: ## %entry -; X64-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X64-NEXT: vbroadcastss (%rdi), %xmm0 ; X64-NEXT: vmovss %xmm0, (%rsi) -; X64-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0] ; X64-NEXT: retq entry: %q = load float, float* %ptr, align 4 @@ -669,16 +659,14 @@ define <2 x double> @I2(double* %ptr, double* %ptr2) nounwind uwtable readnone s ; X32: ## %bb.0: ## %entry ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; X32-NEXT: vmovsd %xmm0, (%eax) -; X32-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] +; X32-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] +; X32-NEXT: vmovlps %xmm0, (%eax) ; X32-NEXT: retl ; ; X64-LABEL: I2: ; X64: ## %bb.0: ## %entry -; X64-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; X64-NEXT: vmovsd %xmm0, (%rsi) -; X64-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] +; X64-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] +; X64-NEXT: vmovlps %xmm0, (%rsi) ; X64-NEXT: retq entry: %q = load double, double* %ptr, align 4 @@ -884,7 +872,6 @@ define void @broadcast_v16i32(i32* %a, <16 x i32>* %b) { ; ; Broadcast scale factor for xyz vector - slp will have vectorized xy. -; FIXME: Load as a broadcast and then use the scalar 0'th element. ; define double @broadcast_scale_xyz(double* nocapture readonly, double* nocapture readonly) nounwind { ; X32-LABEL: broadcast_scale_xyz: @@ -892,9 +879,8 @@ define double @broadcast_scale_xyz(double* nocapture readonly, double* nocapture ; X32-NEXT: subl $12, %esp ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; X32-NEXT: vmovddup {{.*#+}} xmm1 = xmm0[0,0] -; X32-NEXT: vmulpd (%eax), %xmm1, %xmm1 +; X32-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] +; X32-NEXT: vmulpd (%eax), %xmm0, %xmm1 ; X32-NEXT: vmulsd 16(%eax), %xmm0, %xmm0 ; X32-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0] ; X32-NEXT: vaddsd %xmm2, %xmm1, %xmm1 @@ -906,9 +892,8 @@ define double @broadcast_scale_xyz(double* nocapture readonly, double* nocapture ; ; X64-LABEL: broadcast_scale_xyz: ; X64: ## %bb.0: -; X64-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; X64-NEXT: vmovddup {{.*#+}} xmm1 = xmm0[0,0] -; X64-NEXT: vmulpd (%rsi), %xmm1, %xmm1 +; X64-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] +; X64-NEXT: vmulpd (%rsi), %xmm0, %xmm1 ; X64-NEXT: vmulsd 16(%rsi), %xmm0, %xmm0 ; X64-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0] ; X64-NEXT: vaddsd %xmm2, %xmm1, %xmm1 From 38d16c15b759c4a7ff4fa85ddff6054f69377459 Mon Sep 17 00:00:00 2001 From: DeForest Richards Date: Sun, 6 Oct 2019 22:49:22 +0000 Subject: [PATCH 078/254] [Docs] Removes Subsystem Documentation page Removes Subsystem Documentation page. Also moves existing topics on Subsystem Documentation page to User Guides and Reference pages. llvm-svn: 373872 --- llvm/docs/Reference.rst | 96 +++++++++++++++- llvm/docs/SubsystemDocumentation.rst | 158 --------------------------- llvm/docs/UserGuides.rst | 90 ++++++++++++--- llvm/docs/index.rst | 4 - 4 files changed, 171 insertions(+), 177 deletions(-) delete mode 100644 llvm/docs/SubsystemDocumentation.rst diff --git a/llvm/docs/Reference.rst b/llvm/docs/Reference.rst index 0e55be3026670..a24a8a1eade5a 100644 --- a/llvm/docs/Reference.rst +++ b/llvm/docs/Reference.rst @@ -10,23 +10,47 @@ LLVM and API reference documentation. :hidden: Atomics + BitCodeFormat + BlockFrequencyTerminology + BranchWeightMetadata Bugpoint + CFIVerify CommandGuide/index CompilerWriterInfo + Coroutines + DependenceGraphs/index + ExceptionHandling Extensions + FaultMaps FuzzingLLVM GarbageCollection GetElementPtr + GlobalISel GwpAsan HowToSetUpLLVMStyleRTTI + HowToUseAttributes + InAlloca LangRef LibFuzzer + MarkedUpDisassembly + MemorySSA MIRLangRef OptBisect + ORCv2 PDB/index ScudoHardenedAllocator + SegmentedStacks + StackMaps + SpeculativeLoadHardening + SupportLibrary Statepoints + SystemLibrary TestingGuide + TransformMetadata + TypeMetadata + XRay + XRayExample + XRayFDRFormat YamlIO API Reference @@ -37,6 +61,10 @@ API Reference `Documentation for Go bindings `_ +:doc:`ORCv2` + Describes the design and implementation of the ORC APIs, including some + usage examples, and a guide for users transitioning from ORCv1 to ORCv2. + LLVM Reference -------------- @@ -44,16 +72,32 @@ LLVM Reference Defines the LLVM intermediate representation and the assembly form of the different nodes. +:doc:`FaultMaps` + LLVM support for folding control flow into faulting machine instructions. + +:doc:`InAlloca` + Description of the ``inalloca`` argument attribute. + :doc:`Machine IR (MIR) Format Reference Manual ` A reference manual for the MIR serialization format, which is used to test LLVM's code generation passes. +:doc:`GlobalISel` + This describes the prototype instruction selection replacement, GlobalISel. + :doc:`Atomics` Information about LLVM's concurrency model. +:doc:`ExceptionHandling` + This document describes the design and implementation of exception handling + in LLVM. + :doc:`CompilerWriterInfo` A list of helpful links for compiler writers. +:doc:`BitCodeFormat` + This describes the file format and encoding used for LLVM "bc" files. + :doc:`Extensions` LLVM-specific extensions to tools and formats LLVM seeks compatibility with. @@ -61,6 +105,20 @@ LLVM Reference How to make ``isa<>``, ``dyn_cast<>``, etc. available for clients of your class hierarchy. +:doc:`BlockFrequencyTerminology` + Provides information about terminology used in the ``BlockFrequencyInfo`` + analysis pass. + +:doc:`BranchWeightMetadata` + Provides information about Branch Prediction Information. + +:doc:`MemorySSA` + Information about the MemorySSA utility in LLVM, as well as how to use it. + +:doc:`Support Library ` + This document describes the LLVM Support Library (``lib/Support``) and + how to keep LLVM source code portable + :doc:`GetElementPtr` Answers to some very frequent questions about LLVM's most frequently misunderstood instruction. @@ -71,6 +129,32 @@ LLVM Reference :doc:`GwpAsan` A sampled heap memory error detection toolkit designed for production use. +:doc:`Dependence Graphs ` + A description of the design of the various dependence graphs such as + the DDG (Data Dependence Graph). + +:doc:`CFIVerify` + A description of the verification tool for Control Flow Integrity. + +:doc:`SpeculativeLoadHardening` + A description of the Speculative Load Hardening mitigation for Spectre v1. + +:doc:`SegmentedStacks` + This document describes segmented stacks and how they are used in LLVM. + +:doc:`MarkedUpDisassembly` + This document describes the optional rich disassembly output syntax. + +:doc:`HowToUseAttributes` + Answers some questions about the new Attributes infrastructure. + +:doc:`StackMaps` + LLVM support for mapping instruction addresses to the location of + values and allowing code to be patched. + +:doc:`Coroutines` + LLVM support for coroutines. + :doc:`YamlIO` A reference guide for using LLVM's YAML I/O library. @@ -122,4 +206,14 @@ Testing A reference manual for using the LLVM testing infrastructure. :doc:`TestSuiteGuide` - Describes how to compile and run the test-suite benchmarks. \ No newline at end of file + Describes how to compile and run the test-suite benchmarks. + +==== +XRay +==== + +:doc:`XRay` + High-level documentation of how to use XRay in LLVM. + +:doc:`XRayExample` + An example of how to debug an application with XRay. \ No newline at end of file diff --git a/llvm/docs/SubsystemDocumentation.rst b/llvm/docs/SubsystemDocumentation.rst deleted file mode 100644 index 81feb43f2eeb0..0000000000000 --- a/llvm/docs/SubsystemDocumentation.rst +++ /dev/null @@ -1,158 +0,0 @@ -.. _index-subsystem-docs: - -Subsystem Documentation -======================= - -For API clients and LLVM developers. - -.. toctree:: - :hidden: - - AliasAnalysis - MemorySSA - BitCodeFormat - BlockFrequencyTerminology - BranchWeightMetadata - CodeGenerator - ExceptionHandling - AddingConstrainedIntrinsics - LinkTimeOptimization - SegmentedStacks - TableGenFundamentals - DebuggingJITedCode - GoldPlugin - MarkedUpDisassembly - SystemLibrary - SupportLibrary - SourceLevelDebugging - Vectorizers - HowToUseAttributes - StackMaps - InAlloca - BigEndianNEON - CoverageMappingFormat - MergeFunctions - TypeMetadata - TransformMetadata - FaultMaps - Coroutines - GlobalISel - XRay - XRayExample - XRayFDRFormat - CFIVerify - SpeculativeLoadHardening - StackSafetyAnalysis - LoopTerminology - DependenceGraphs/index - -:doc:`AliasAnalysis` - Information on how to write a new alias analysis implementation or how to - use existing analyses. - -:doc:`MemorySSA` - Information about the MemorySSA utility in LLVM, as well as how to use it. - -:doc:`Source Level Debugging with LLVM ` - This document describes the design and philosophy behind the LLVM - source-level debugger. - -:doc:`Vectorizers` - This document describes the current status of vectorization in LLVM. - -:doc:`ExceptionHandling` - This document describes the design and implementation of exception handling - in LLVM. - -:doc:`AddingConstrainedIntrinsics` - Gives the steps necessary when adding a new constrained math intrinsic - to LLVM. - -:doc:`BitCodeFormat` - This describes the file format and encoding used for LLVM "bc" files. - -:doc:`Support Library ` - This document describes the LLVM Support Library (``lib/Support``) and - how to keep LLVM source code portable - -:doc:`LinkTimeOptimization` - This document describes the interface between LLVM intermodular optimizer - and the linker and its design - -:doc:`GoldPlugin` - How to build your programs with link-time optimization on Linux. - -:doc:`DebuggingJITedCode` - How to debug JITed code with GDB. - -:doc:`MCJITDesignAndImplementation` - Describes the inner workings of MCJIT execution engine. - -:doc:`ORCv2` - Describes the design and implementation of the ORC APIs, including some - usage examples, and a guide for users transitioning from ORCv1 to ORCv2. - -:doc:`BranchWeightMetadata` - Provides information about Branch Prediction Information. - -:doc:`BlockFrequencyTerminology` - Provides information about terminology used in the ``BlockFrequencyInfo`` - analysis pass. - -:doc:`SegmentedStacks` - This document describes segmented stacks and how they are used in LLVM. - -:doc:`MarkedUpDisassembly` - This document describes the optional rich disassembly output syntax. - -:doc:`HowToUseAttributes` - Answers some questions about the new Attributes infrastructure. - -:doc:`StackMaps` - LLVM support for mapping instruction addresses to the location of - values and allowing code to be patched. - -:doc:`BigEndianNEON` - LLVM's support for generating NEON instructions on big endian ARM targets is - somewhat nonintuitive. This document explains the implementation and rationale. - -:doc:`CoverageMappingFormat` - This describes the format and encoding used for LLVM’s code coverage mapping. - -:doc:`MergeFunctions` - Describes functions merging optimization. - -:doc:`InAlloca` - Description of the ``inalloca`` argument attribute. - -:doc:`FaultMaps` - LLVM support for folding control flow into faulting machine instructions. - -:doc:`CompileCudaWithLLVM` - LLVM support for CUDA. - -:doc:`Coroutines` - LLVM support for coroutines. - -:doc:`GlobalISel` - This describes the prototype instruction selection replacement, GlobalISel. - -:doc:`XRay` - High-level documentation of how to use XRay in LLVM. - -:doc:`XRayExample` - An example of how to debug an application with XRay. - -:doc:`CFIVerify` - A description of the verification tool for Control Flow Integrity. - -:doc:`SpeculativeLoadHardening` - A description of the Speculative Load Hardening mitigation for Spectre v1. - -:doc:`StackSafetyAnalysis` - This document describes the design of the stack safety analysis of local - variables. - -:doc:`Dependence Graphs ` - A description of the design of the various dependence graphs such as - the DDG (Data Dependence Graph). \ No newline at end of file diff --git a/llvm/docs/UserGuides.rst b/llvm/docs/UserGuides.rst index ddb3acf6fbd78..5c035d1717d6a 100644 --- a/llvm/docs/UserGuides.rst +++ b/llvm/docs/UserGuides.rst @@ -12,33 +12,46 @@ intermediate LLVM representation. .. toctree:: :hidden: + AddingConstrainedIntrinsics + AdvancedBuilds + AliasAnalysis + AMDGPUUsage + Benchmarking + BigEndianNEON + BuildingADistribution CMake CMakePrimer - AdvancedBuilds + CodeGenerator + CodeOfConduct + CommandLine + CompileCudaWithLLVM + CoverageMappingFormat + DebuggingJITedCode + Docker + ExtendingLLVM + GoldPlugin HowToBuildOnARM HowToBuildWithPGO HowToCrossCompileBuiltinsOnArm HowToCrossCompileLLVM - yaml2obj + LinkTimeOptimization + LoopTerminology MarkdownQuickstartTemplate + MergeFunctions + MCJITDesignAndImplementation + NVPTXUsage Phabricator Passes - MCJITDesignAndImplementation - ORCv2 - CodeOfConduct - CompileCudaWithLLVM ReportingGuide - Benchmarking - Docker - BuildingADistribution Remarks + StackSafetyAnalysis + SourceLevelDebugging + TableGen/index + TableGenFundamentals + Vectorizers WritingAnLLVMPass WritingAnLLVMBackend - TableGen/index - NVPTXUsage - AMDGPUUsage - ExtendingLLVM - CommandLine + yaml2obj Clang ----- @@ -57,6 +70,9 @@ Clang .. __: http://clang.llvm.org/get_started.html +:doc:`CoverageMappingFormat` + This describes the format and encoding used for LLVM’s code coverage mapping. + LLVM Builds and Distributions ----------------------------- @@ -80,12 +96,37 @@ Optimizations :doc:`Passes` A list of optimizations and analyses implemented in LLVM. +:doc:`StackSafetyAnalysis` + This document describes the design of the stack safety analysis of local + variables. + +:doc:`MergeFunctions` + Describes functions merging optimization. + +:doc:`AliasAnalysis` + Information on how to write a new alias analysis implementation or how to + use existing analyses. + :doc:`LoopTerminology` A document describing Loops and associated terms as used in LLVM. +:doc:`Vectorizers` + This document describes the current status of vectorization in LLVM. + +:doc:`LinkTimeOptimization` + This document describes the interface between LLVM intermodular optimizer + and the linker and its design + +:doc:`GoldPlugin` + How to build your programs with link-time optimization on Linux. + :doc:`Remarks` A reference on the implementation of remarks in LLVM. +:doc:`Source Level Debugging with LLVM ` + This document describes the design and philosophy behind the LLVM + source-level debugger. + Code Generation --------------- @@ -101,6 +142,16 @@ Code Generation Describes the TableGen tool, which is used heavily by the LLVM code generator. +=== +JIT +=== + +:doc:`MCJITDesignAndImplementation` + Describes the inner workings of MCJIT execution engine. + +:doc:`DebuggingJITedCode` + How to debug JITed code with GDB. + Additional Topics ----------------- @@ -110,9 +161,20 @@ Additional Topics :doc:`ExtendingLLVM` Look here to see how to add instructions and intrinsics to LLVM. +:doc:`AddingConstrainedIntrinsics` + Gives the steps necessary when adding a new constrained math intrinsic + to LLVM. + :doc:`HowToCrossCompileBuiltinsOnArm` Notes on cross-building and testing the compiler-rt builtins for Arm. +:doc:`BigEndianNEON` + LLVM's support for generating NEON instructions on big endian ARM targets is + somewhat nonintuitive. This document explains the implementation and rationale. + +:doc:`CompileCudaWithLLVM` + LLVM support for CUDA. + :doc:`NVPTXUsage` This document describes using the NVPTX backend to compile GPU kernels. diff --git a/llvm/docs/index.rst b/llvm/docs/index.rst index c5ddabfe0404e..531616d69129c 100644 --- a/llvm/docs/index.rst +++ b/llvm/docs/index.rst @@ -55,7 +55,6 @@ Getting Started, How-tos, Developer Guides, and Tutorials. GettingStartedTutorials Reference - SubsystemDocumentation UserGuides :doc:`GettingStartedTutorials` @@ -64,9 +63,6 @@ Getting Started, How-tos, Developer Guides, and Tutorials. :doc:`UserGuides` User guides and How-tos. -:doc:`SubsystemDocumentation` - For API clients and LLVM developers. - :doc:`Reference` LLVM and API reference documentation. From a30730f6904916e8c97f6fd934bf493e999cb1e4 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Mon, 7 Oct 2019 00:37:10 +0000 Subject: [PATCH 079/254] gn build: no-op style tweak in sync script llvm-svn: 373873 --- llvm/utils/gn/build/sync_source_lists_from_cmake.py | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/utils/gn/build/sync_source_lists_from_cmake.py b/llvm/utils/gn/build/sync_source_lists_from_cmake.py index 5063f71a826ba..4e64b0cac8790 100755 --- a/llvm/utils/gn/build/sync_source_lists_from_cmake.py +++ b/llvm/utils/gn/build/sync_source_lists_from_cmake.py @@ -155,6 +155,5 @@ def main(): sys.exit(1) - if __name__ == '__main__': main() From 74ce7112c3fccccfa7edb134c0a2d8fe2aab462f Mon Sep 17 00:00:00 2001 From: Richard Smith Date: Mon, 7 Oct 2019 02:45:12 +0000 Subject: [PATCH 080/254] Fix behavior of __builtin_bit_cast when the From and To types are the same. We were missing the lvalue-to-rvalue conversion entirely in this case, and in fact still need the full CK_LValueToRValueBitCast conversion to perform a load with no TBAA. llvm-svn: 373874 --- clang/include/clang/AST/OperationKinds.def | 5 +++-- clang/lib/Sema/SemaCast.cpp | 5 ----- .../test/CodeGenCXX/builtin-bit-cast-no-tbaa.cpp | 5 +++++ .../test/SemaCXX/constexpr-builtin-bit-cast.cpp | 16 ++++++++++++++++ 4 files changed, 24 insertions(+), 7 deletions(-) diff --git a/clang/include/clang/AST/OperationKinds.def b/clang/include/clang/AST/OperationKinds.def index 9af92c1ae7ff7..f29664e8eb338 100644 --- a/clang/include/clang/AST/OperationKinds.def +++ b/clang/include/clang/AST/OperationKinds.def @@ -66,8 +66,9 @@ CAST_OPERATION(BitCast) /// bool b; reinterpret_cast(b) = 'a'; CAST_OPERATION(LValueBitCast) -/// CK_LValueToRValueBitCast - A conversion that causes us to reinterpret an -/// lvalue as an rvalue of a different type. Created by __builtin_bit_cast. +/// CK_LValueToRValueBitCast - A conversion that causes us to reinterpret the +/// object representation of an lvalue as an rvalue. Created by +/// __builtin_bit_cast. CAST_OPERATION(LValueToRValueBitCast) /// CK_LValueToRValue - A conversion which causes the extraction of diff --git a/clang/lib/Sema/SemaCast.cpp b/clang/lib/Sema/SemaCast.cpp index 71e5e8e4286f7..8c6abc448d977 100644 --- a/clang/lib/Sema/SemaCast.cpp +++ b/clang/lib/Sema/SemaCast.cpp @@ -2835,11 +2835,6 @@ void CastOperation::CheckBuiltinBitCast() { return; } - if (Self.Context.hasSameUnqualifiedType(DestType, SrcType)) { - Kind = CK_NoOp; - return; - } - Kind = CK_LValueToRValueBitCast; } diff --git a/clang/test/CodeGenCXX/builtin-bit-cast-no-tbaa.cpp b/clang/test/CodeGenCXX/builtin-bit-cast-no-tbaa.cpp index 3f0e490a5f94b..b26e519bee3d4 100644 --- a/clang/test/CodeGenCXX/builtin-bit-cast-no-tbaa.cpp +++ b/clang/test/CodeGenCXX/builtin-bit-cast-no-tbaa.cpp @@ -15,5 +15,10 @@ void test_scalar2() { // CHECK: load i32, i32* {{.*}}, align 4, !tbaa ![[MAY_ALIAS_TBAA]] } +int test_same_type(int &r) { + // CHECK: load i32, i32* {{.*}}, align 4, !tbaa ![[MAY_ALIAS_TBAA]] + return __builtin_bit_cast(int, r); +} + // CHECK: ![[CHAR_TBAA:.*]] = !{!"omnipotent char", {{.*}}, i64 0} // CHECK: ![[MAY_ALIAS_TBAA]] = !{![[CHAR_TBAA]], ![[CHAR_TBAA]], i64 0} diff --git a/clang/test/SemaCXX/constexpr-builtin-bit-cast.cpp b/clang/test/SemaCXX/constexpr-builtin-bit-cast.cpp index 0a12e7eebe45e..06771f8f3252a 100644 --- a/clang/test/SemaCXX/constexpr-builtin-bit-cast.cpp +++ b/clang/test/SemaCXX/constexpr-builtin-bit-cast.cpp @@ -381,3 +381,19 @@ constexpr bool test_pad_buffer() { return x.a == z.a && x.b == z.b; } static_assert(test_pad_buffer()); + +constexpr unsigned char identity1a = 42; +constexpr unsigned char identity1b = __builtin_bit_cast(unsigned char, identity1a); +static_assert(identity1b == 42); + +struct IdentityInStruct { + unsigned char n; +}; +constexpr IdentityInStruct identity2a = {42}; +constexpr unsigned char identity2b = __builtin_bit_cast(unsigned char, identity2a.n); + +union IdentityInUnion { + unsigned char n; +}; +constexpr IdentityInUnion identity3a = {42}; +constexpr unsigned char identity3b = __builtin_bit_cast(unsigned char, identity3a.n); From df3761f6dcdac76efcdcdc85d4011f0e716e8baa Mon Sep 17 00:00:00 2001 From: Richard Smith Date: Mon, 7 Oct 2019 03:14:28 +0000 Subject: [PATCH 081/254] [c++20] Check for a class-specific operator delete when deleting an object of class type with a virtual destructor. llvm-svn: 373875 --- clang/lib/AST/ExprConstant.cpp | 19 +++++++++++++++++++ .../SemaCXX/constant-expression-cxx2a.cpp | 19 +++++++++++++++++++ 2 files changed, 38 insertions(+) diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 8c56a3cc5504d..c32f516aec7e8 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -6019,6 +6019,13 @@ static bool hasVirtualDestructor(QualType T) { return false; } +static const FunctionDecl *getVirtualOperatorDelete(QualType T) { + if (CXXRecordDecl *RD = T->getAsCXXRecordDecl()) + if (CXXDestructorDecl *DD = RD->getDestructor()) + return DD->isVirtual() ? DD->getOperatorDelete() : nullptr; + return nullptr; +} + /// Check that the given object is a suitable pointer to a heap allocation that /// still exists and is of the right kind for the purpose of a deletion. /// @@ -13208,6 +13215,18 @@ bool VoidExprEvaluator::VisitCXXDeleteExpr(const CXXDeleteExpr *E) { return false; } + // For a class type with a virtual destructor, the selected operator delete + // is the one looked up when building the destructor. + if (!E->isArrayForm() && !E->isGlobalDelete()) { + const FunctionDecl *VirtualDelete = getVirtualOperatorDelete(AllocType); + if (VirtualDelete && + !VirtualDelete->isReplaceableGlobalAllocationFunction()) { + Info.FFDiag(E, diag::note_constexpr_new_non_replaceable) + << isa(VirtualDelete) << VirtualDelete; + return false; + } + } + if (!HandleDestruction(Info, E->getExprLoc(), Pointer.getLValueBase(), (*Alloc)->Value, AllocType)) return false; diff --git a/clang/test/SemaCXX/constant-expression-cxx2a.cpp b/clang/test/SemaCXX/constant-expression-cxx2a.cpp index cb8f16a8b0f57..7c1718cebe44a 100644 --- a/clang/test/SemaCXX/constant-expression-cxx2a.cpp +++ b/clang/test/SemaCXX/constant-expression-cxx2a.cpp @@ -1190,6 +1190,25 @@ namespace dtor_call { static_assert(virt_dtor(3, "YX")); static_assert(virt_dtor(4, "X")); + constexpr bool virt_delete(bool global) { + struct A { + virtual constexpr ~A() {} + }; + struct B : A { + void operator delete(void *); + constexpr ~B() {} + }; + + A *p = new B; + if (global) + ::delete p; + else + delete p; // expected-note {{call to class-specific 'operator delete'}} + return true; + } + static_assert(virt_delete(true)); + static_assert(virt_delete(false)); // expected-error {{}} expected-note {{in call}} + constexpr void use_after_virt_destroy() { char buff[4] = {}; VU vu; From 6088f84398847152ad97eb1bc0b139a28e879b48 Mon Sep 17 00:00:00 2001 From: Yi-Hong Lyu Date: Mon, 7 Oct 2019 05:29:11 +0000 Subject: [PATCH 082/254] [NFC][CGP] Tests for making ICMP_EQ use CR result of ICMP_S(L|G)T dominators llvm-svn: 373876 --- .../AArch64/use-cr-result-of-dom-icmp-st.ll | 547 +++++++++++++++ .../PowerPC/use-cr-result-of-dom-icmp-st.ll | 492 +++++++------- .../X86/use-cr-result-of-dom-icmp-st.ll | 627 ++++++++++++++++++ 3 files changed, 1434 insertions(+), 232 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/use-cr-result-of-dom-icmp-st.ll create mode 100644 llvm/test/CodeGen/X86/use-cr-result-of-dom-icmp-st.ll diff --git a/llvm/test/CodeGen/AArch64/use-cr-result-of-dom-icmp-st.ll b/llvm/test/CodeGen/AArch64/use-cr-result-of-dom-icmp-st.ll new file mode 100644 index 0000000000000..31cc59c10b9c7 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/use-cr-result-of-dom-icmp-st.ll @@ -0,0 +1,547 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-unknown-unknown -O3 -verify-machineinstrs < %s | FileCheck %s + +; Test cases are generated from: +; long long NAME(PARAM a, PARAM b) { +; if (LHS > RHS) +; return b; +; if (LHS < RHS) +; return a;\ +; return a * b; +; } +; Please note funtion name is defined as __. Take ll_a_op_b__1 +; for example. ll is PARAM, a_op_b (i.e., a << b) is LHS, _1 (i.e., -1) is RHS. + +target datalayout = "e-m:e-i64:64-n32:64" + +define i64 @ll_a_op_b__2(i64 %a, i64 %b) { +; CHECK-LABEL: ll_a_op_b__2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: lsl x8, x0, x1 +; CHECK-NEXT: cmn x8, #2 // =2 +; CHECK-NEXT: b.le .LBB0_2 +; CHECK-NEXT: // %bb.1: // %return +; CHECK-NEXT: mov x0, x1 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB0_2: // %if.end +; CHECK-NEXT: csinc x8, x1, xzr, eq +; CHECK-NEXT: mul x0, x8, x0 +; CHECK-NEXT: ret +entry: + %shl = shl i64 %a, %b + %cmp = icmp sgt i64 %shl, -2 + br i1 %cmp, label %return, label %if.end + +if.end: ; preds = %entry + %cmp2 = icmp eq i64 %shl, -2 + %mul = select i1 %cmp2, i64 %b, i64 1 + %spec.select = mul nsw i64 %mul, %a + ret i64 %spec.select + +return: ; preds = %entry + ret i64 %b +} + +define i64 @ll_a_op_b__1(i64 %a, i64 %b) { +; CHECK-LABEL: ll_a_op_b__1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: lsl x8, x0, x1 +; CHECK-NEXT: tbnz x8, #63, .LBB1_2 +; CHECK-NEXT: // %bb.1: // %return +; CHECK-NEXT: mov x0, x1 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB1_2: // %if.end +; CHECK-NEXT: cmn x8, #1 // =1 +; CHECK-NEXT: csinc x8, x1, xzr, eq +; CHECK-NEXT: mul x0, x8, x0 +; CHECK-NEXT: ret +entry: + %shl = shl i64 %a, %b + %cmp = icmp sgt i64 %shl, -1 + br i1 %cmp, label %return, label %if.end + +if.end: ; preds = %entry + %cmp2 = icmp eq i64 %shl, -1 + %mul = select i1 %cmp2, i64 %b, i64 1 + %spec.select = mul nsw i64 %mul, %a + ret i64 %spec.select + +return: ; preds = %entry + ret i64 %b +} + +define i64 @ll_a_op_b_0(i64 %a, i64 %b) { +; CHECK-LABEL: ll_a_op_b_0: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: lsl x8, x0, x1 +; CHECK-NEXT: cmp x8, #0 // =0 +; CHECK-NEXT: b.le .LBB2_2 +; CHECK-NEXT: // %bb.1: // %return +; CHECK-NEXT: mov x0, x1 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB2_2: // %if.end +; CHECK-NEXT: csinc x8, x1, xzr, eq +; CHECK-NEXT: mul x0, x8, x0 +; CHECK-NEXT: ret +entry: + %shl = shl i64 %a, %b + %cmp = icmp sgt i64 %shl, 0 + br i1 %cmp, label %return, label %if.end + +if.end: ; preds = %entry + %cmp2 = icmp eq i64 %shl, 0 + %mul = select i1 %cmp2, i64 %b, i64 1 + %spec.select = mul nsw i64 %mul, %a + ret i64 %spec.select + +return: ; preds = %entry + ret i64 %b +} + +define i64 @ll_a_op_b_1(i64 %a, i64 %b) { +; CHECK-LABEL: ll_a_op_b_1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: lsl x8, x0, x1 +; CHECK-NEXT: cmp x8, #1 // =1 +; CHECK-NEXT: b.le .LBB3_2 +; CHECK-NEXT: // %bb.1: // %return +; CHECK-NEXT: mov x0, x1 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB3_2: // %if.end +; CHECK-NEXT: csinc x8, x1, xzr, eq +; CHECK-NEXT: mul x0, x8, x0 +; CHECK-NEXT: ret +entry: + %shl = shl i64 %a, %b + %cmp = icmp sgt i64 %shl, 1 + br i1 %cmp, label %return, label %if.end + +if.end: ; preds = %entry + %cmp2 = icmp eq i64 %shl, 1 + %mul = select i1 %cmp2, i64 %b, i64 1 + %spec.select = mul nsw i64 %mul, %a + ret i64 %spec.select + +return: ; preds = %entry + ret i64 %b +} + +define i64 @ll_a_op_b_2(i64 %a, i64 %b) { +; CHECK-LABEL: ll_a_op_b_2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: lsl x8, x0, x1 +; CHECK-NEXT: cmp x8, #2 // =2 +; CHECK-NEXT: b.le .LBB4_2 +; CHECK-NEXT: // %bb.1: // %return +; CHECK-NEXT: mov x0, x1 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB4_2: // %if.end +; CHECK-NEXT: csinc x8, x1, xzr, eq +; CHECK-NEXT: mul x0, x8, x0 +; CHECK-NEXT: ret +entry: + %shl = shl i64 %a, %b + %cmp = icmp sgt i64 %shl, 2 + br i1 %cmp, label %return, label %if.end + +if.end: ; preds = %entry + %cmp2 = icmp eq i64 %shl, 2 + %mul = select i1 %cmp2, i64 %b, i64 1 + %spec.select = mul nsw i64 %mul, %a + ret i64 %spec.select + +return: ; preds = %entry + ret i64 %b +} + +define i64 @ll_a__2(i64 %a, i64 %b) { +; CHECK-LABEL: ll_a__2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: cmn x0, #2 // =2 +; CHECK-NEXT: b.le .LBB5_2 +; CHECK-NEXT: // %bb.1: // %return +; CHECK-NEXT: mov x0, x1 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB5_2: // %if.end +; CHECK-NEXT: csinc x8, x1, xzr, eq +; CHECK-NEXT: mul x0, x8, x0 +; CHECK-NEXT: ret +entry: + %cmp = icmp sgt i64 %a, -2 + br i1 %cmp, label %return, label %if.end + +if.end: ; preds = %entry + %cmp1 = icmp eq i64 %a, -2 + %mul = select i1 %cmp1, i64 %b, i64 1 + %spec.select = mul nsw i64 %mul, %a + ret i64 %spec.select + +return: ; preds = %entry + ret i64 %b +} + +define i64 @ll_a__1(i64 %a, i64 %b) { +; CHECK-LABEL: ll_a__1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: tbnz x0, #63, .LBB6_2 +; CHECK-NEXT: // %bb.1: // %return +; CHECK-NEXT: mov x0, x1 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB6_2: // %if.end +; CHECK-NEXT: cmn x0, #1 // =1 +; CHECK-NEXT: csinc x8, x1, xzr, eq +; CHECK-NEXT: mul x0, x8, x0 +; CHECK-NEXT: ret +entry: + %cmp = icmp sgt i64 %a, -1 + br i1 %cmp, label %return, label %if.end + +if.end: ; preds = %entry + %cmp1 = icmp eq i64 %a, -1 + %mul = select i1 %cmp1, i64 %b, i64 1 + %spec.select = mul nsw i64 %mul, %a + ret i64 %spec.select + +return: ; preds = %entry + ret i64 %b +} + +define i64 @ll_a_0(i64 %a, i64 %b) { +; CHECK-LABEL: ll_a_0: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: cmp x0, #0 // =0 +; CHECK-NEXT: b.le .LBB7_2 +; CHECK-NEXT: // %bb.1: // %return +; CHECK-NEXT: mov x0, x1 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB7_2: // %if.end +; CHECK-NEXT: csinc x8, x1, xzr, eq +; CHECK-NEXT: mul x0, x8, x0 +; CHECK-NEXT: ret +entry: + %cmp = icmp sgt i64 %a, 0 + br i1 %cmp, label %return, label %if.end + +if.end: ; preds = %entry + %cmp1 = icmp eq i64 %a, 0 + %mul = select i1 %cmp1, i64 %b, i64 1 + %spec.select = mul nsw i64 %mul, %a + ret i64 %spec.select + +return: ; preds = %entry + ret i64 %b +} + +define i64 @ll_a_1(i64 %a, i64 %b) { +; CHECK-LABEL: ll_a_1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: cmp x0, #1 // =1 +; CHECK-NEXT: b.le .LBB8_2 +; CHECK-NEXT: // %bb.1: // %return +; CHECK-NEXT: mov x0, x1 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB8_2: // %if.end +; CHECK-NEXT: csinc x8, x1, xzr, eq +; CHECK-NEXT: mul x0, x8, x0 +; CHECK-NEXT: ret +entry: + %cmp = icmp sgt i64 %a, 1 + br i1 %cmp, label %return, label %if.end + +if.end: ; preds = %entry + %cmp1 = icmp eq i64 %a, 1 + %mul = select i1 %cmp1, i64 %b, i64 1 + %spec.select = mul nsw i64 %mul, %a + ret i64 %spec.select + +return: ; preds = %entry + ret i64 %b +} + +define i64 @ll_a_2(i64 %a, i64 %b) { +; CHECK-LABEL: ll_a_2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: cmp x0, #2 // =2 +; CHECK-NEXT: b.le .LBB9_2 +; CHECK-NEXT: // %bb.1: // %return +; CHECK-NEXT: mov x0, x1 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB9_2: // %if.end +; CHECK-NEXT: csinc x8, x1, xzr, eq +; CHECK-NEXT: mul x0, x8, x0 +; CHECK-NEXT: ret +entry: + %cmp = icmp sgt i64 %a, 2 + br i1 %cmp, label %return, label %if.end + +if.end: ; preds = %entry + %cmp1 = icmp eq i64 %a, 2 + %mul = select i1 %cmp1, i64 %b, i64 1 + %spec.select = mul nsw i64 %mul, %a + ret i64 %spec.select + +return: ; preds = %entry + ret i64 %b +} + +define i64 @i_a_op_b__2(i32 signext %a, i32 signext %b) { +; CHECK-LABEL: i_a_op_b__2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: lsl w8, w0, w1 +; CHECK-NEXT: cmn w8, #2 // =2 +; CHECK-NEXT: csinc w8, w1, wzr, eq +; CHECK-NEXT: mul w8, w8, w0 +; CHECK-NEXT: csel w8, w1, w8, gt +; CHECK-NEXT: sxtw x0, w8 +; CHECK-NEXT: ret +entry: + %shl = shl i32 %a, %b + %cmp = icmp sgt i32 %shl, -2 + br i1 %cmp, label %return, label %if.end + +if.end: ; preds = %entry + %cmp2 = icmp eq i32 %shl, -2 + %mul = select i1 %cmp2, i32 %b, i32 1 + %spec.select = mul nsw i32 %mul, %a + br label %return + +return: ; preds = %if.end, %entry + %retval.0.in = phi i32 [ %b, %entry ], [ %spec.select, %if.end ] + %retval.0 = sext i32 %retval.0.in to i64 + ret i64 %retval.0 +} + +define i64 @i_a_op_b__1(i32 signext %a, i32 signext %b) { +; CHECK-LABEL: i_a_op_b__1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: lsl w8, w0, w1 +; CHECK-NEXT: cmn w8, #1 // =1 +; CHECK-NEXT: csinc w9, w1, wzr, eq +; CHECK-NEXT: mul w9, w9, w0 +; CHECK-NEXT: cmp w8, #0 // =0 +; CHECK-NEXT: csel w8, w1, w9, ge +; CHECK-NEXT: sxtw x0, w8 +; CHECK-NEXT: ret +entry: + %shl = shl i32 %a, %b + %cmp = icmp sgt i32 %shl, -1 + br i1 %cmp, label %return, label %if.end + +if.end: ; preds = %entry + %cmp2 = icmp eq i32 %shl, -1 + %mul = select i1 %cmp2, i32 %b, i32 1 + %spec.select = mul nsw i32 %mul, %a + br label %return + +return: ; preds = %if.end, %entry + %retval.0.in = phi i32 [ %b, %entry ], [ %spec.select, %if.end ] + %retval.0 = sext i32 %retval.0.in to i64 + ret i64 %retval.0 +} + +define i64 @i_a_op_b_0(i32 signext %a, i32 signext %b) { +; CHECK-LABEL: i_a_op_b_0: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: lsl w8, w0, w1 +; CHECK-NEXT: cmp w8, #0 // =0 +; CHECK-NEXT: csinc w8, w1, wzr, eq +; CHECK-NEXT: mul w8, w8, w0 +; CHECK-NEXT: csel w8, w1, w8, gt +; CHECK-NEXT: sxtw x0, w8 +; CHECK-NEXT: ret +entry: + %shl = shl i32 %a, %b + %cmp = icmp sgt i32 %shl, 0 + br i1 %cmp, label %return, label %if.end + +if.end: ; preds = %entry + %cmp2 = icmp eq i32 %shl, 0 + %mul = select i1 %cmp2, i32 %b, i32 1 + %spec.select = mul nsw i32 %mul, %a + br label %return + +return: ; preds = %if.end, %entry + %retval.0.in = phi i32 [ %b, %entry ], [ %spec.select, %if.end ] + %retval.0 = sext i32 %retval.0.in to i64 + ret i64 %retval.0 +} + +define i64 @i_a_op_b_1(i32 signext %a, i32 signext %b) { +; CHECK-LABEL: i_a_op_b_1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: lsl w8, w0, w1 +; CHECK-NEXT: cmp w8, #1 // =1 +; CHECK-NEXT: csinc w8, w1, wzr, eq +; CHECK-NEXT: mul w8, w8, w0 +; CHECK-NEXT: csel w8, w1, w8, gt +; CHECK-NEXT: sxtw x0, w8 +; CHECK-NEXT: ret +entry: + %shl = shl i32 %a, %b + %cmp = icmp sgt i32 %shl, 1 + br i1 %cmp, label %return, label %if.end + +if.end: ; preds = %entry + %cmp2 = icmp eq i32 %shl, 1 + %mul = select i1 %cmp2, i32 %b, i32 1 + %spec.select = mul nsw i32 %mul, %a + br label %return + +return: ; preds = %if.end, %entry + %retval.0.in = phi i32 [ %b, %entry ], [ %spec.select, %if.end ] + %retval.0 = sext i32 %retval.0.in to i64 + ret i64 %retval.0 +} + +define i64 @i_a_op_b_2(i32 signext %a, i32 signext %b) { +; CHECK-LABEL: i_a_op_b_2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: lsl w8, w0, w1 +; CHECK-NEXT: cmp w8, #2 // =2 +; CHECK-NEXT: csinc w8, w1, wzr, eq +; CHECK-NEXT: mul w8, w8, w0 +; CHECK-NEXT: csel w8, w1, w8, gt +; CHECK-NEXT: sxtw x0, w8 +; CHECK-NEXT: ret +entry: + %shl = shl i32 %a, %b + %cmp = icmp sgt i32 %shl, 2 + br i1 %cmp, label %return, label %if.end + +if.end: ; preds = %entry + %cmp2 = icmp eq i32 %shl, 2 + %mul = select i1 %cmp2, i32 %b, i32 1 + %spec.select = mul nsw i32 %mul, %a + br label %return + +return: ; preds = %if.end, %entry + %retval.0.in = phi i32 [ %b, %entry ], [ %spec.select, %if.end ] + %retval.0 = sext i32 %retval.0.in to i64 + ret i64 %retval.0 +} + +define i64 @i_a__2(i32 signext %a, i32 signext %b) { +; CHECK-LABEL: i_a__2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: cmn w0, #2 // =2 +; CHECK-NEXT: csinc w8, w1, wzr, eq +; CHECK-NEXT: mul w8, w8, w0 +; CHECK-NEXT: csel w8, w1, w8, gt +; CHECK-NEXT: sxtw x0, w8 +; CHECK-NEXT: ret +entry: + %cmp = icmp sgt i32 %a, -2 + br i1 %cmp, label %return, label %if.end + +if.end: ; preds = %entry + %cmp1 = icmp eq i32 %a, -2 + %mul = select i1 %cmp1, i32 %b, i32 1 + %spec.select = mul nsw i32 %mul, %a + br label %return + +return: ; preds = %if.end, %entry + %retval.0.in = phi i32 [ %b, %entry ], [ %spec.select, %if.end ] + %retval.0 = sext i32 %retval.0.in to i64 + ret i64 %retval.0 +} + +define i64 @i_a__1(i32 signext %a, i32 signext %b) { +; CHECK-LABEL: i_a__1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: cmn w0, #1 // =1 +; CHECK-NEXT: csinc w8, w1, wzr, eq +; CHECK-NEXT: mul w8, w8, w0 +; CHECK-NEXT: cmp w0, #0 // =0 +; CHECK-NEXT: csel w8, w1, w8, ge +; CHECK-NEXT: sxtw x0, w8 +; CHECK-NEXT: ret +entry: + %cmp = icmp sgt i32 %a, -1 + br i1 %cmp, label %return, label %if.end + +if.end: ; preds = %entry + %cmp1 = icmp eq i32 %a, -1 + %mul = select i1 %cmp1, i32 %b, i32 1 + %spec.select = mul nsw i32 %mul, %a + br label %return + +return: ; preds = %if.end, %entry + %retval.0.in = phi i32 [ %b, %entry ], [ %spec.select, %if.end ] + %retval.0 = sext i32 %retval.0.in to i64 + ret i64 %retval.0 +} + +define i64 @i_a_0(i32 signext %a, i32 signext %b) { +; CHECK-LABEL: i_a_0: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: cmp w0, #0 // =0 +; CHECK-NEXT: csinc w8, w1, wzr, eq +; CHECK-NEXT: mul w8, w8, w0 +; CHECK-NEXT: csel w8, w1, w8, gt +; CHECK-NEXT: sxtw x0, w8 +; CHECK-NEXT: ret +entry: + %cmp = icmp sgt i32 %a, 0 + br i1 %cmp, label %return, label %if.end + +if.end: ; preds = %entry + %cmp1 = icmp eq i32 %a, 0 + %mul = select i1 %cmp1, i32 %b, i32 1 + %spec.select = mul nsw i32 %mul, %a + br label %return + +return: ; preds = %if.end, %entry + %retval.0.in = phi i32 [ %b, %entry ], [ %spec.select, %if.end ] + %retval.0 = sext i32 %retval.0.in to i64 + ret i64 %retval.0 +} + +define i64 @i_a_1(i32 signext %a, i32 signext %b) { +; CHECK-LABEL: i_a_1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: cmp w0, #1 // =1 +; CHECK-NEXT: csinc w8, w1, wzr, eq +; CHECK-NEXT: mul w8, w8, w0 +; CHECK-NEXT: csel w8, w1, w8, gt +; CHECK-NEXT: sxtw x0, w8 +; CHECK-NEXT: ret +entry: + %cmp = icmp sgt i32 %a, 1 + br i1 %cmp, label %return, label %if.end + +if.end: ; preds = %entry + %cmp1 = icmp eq i32 %a, 1 + %mul = select i1 %cmp1, i32 %b, i32 1 + %spec.select = mul nsw i32 %mul, %a + br label %return + +return: ; preds = %if.end, %entry + %retval.0.in = phi i32 [ %b, %entry ], [ %spec.select, %if.end ] + %retval.0 = sext i32 %retval.0.in to i64 + ret i64 %retval.0 +} + +define i64 @i_a_2(i32 signext %a, i32 signext %b) { +; CHECK-LABEL: i_a_2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: cmp w0, #2 // =2 +; CHECK-NEXT: csinc w8, w1, wzr, eq +; CHECK-NEXT: mul w8, w8, w0 +; CHECK-NEXT: csel w8, w1, w8, gt +; CHECK-NEXT: sxtw x0, w8 +; CHECK-NEXT: ret +entry: + %cmp = icmp sgt i32 %a, 2 + br i1 %cmp, label %return, label %if.end + +if.end: ; preds = %entry + %cmp1 = icmp eq i32 %a, 2 + %mul = select i1 %cmp1, i32 %b, i32 1 + %spec.select = mul nsw i32 %mul, %a + br label %return + +return: ; preds = %if.end, %entry + %retval.0.in = phi i32 [ %b, %entry ], [ %spec.select, %if.end ] + %retval.0 = sext i32 %retval.0.in to i64 + ret i64 %retval.0 +} diff --git a/llvm/test/CodeGen/PowerPC/use-cr-result-of-dom-icmp-st.ll b/llvm/test/CodeGen/PowerPC/use-cr-result-of-dom-icmp-st.ll index 7839669bea9b1..3a41db0cb9809 100644 --- a/llvm/test/CodeGen/PowerPC/use-cr-result-of-dom-icmp-st.ll +++ b/llvm/test/CodeGen/PowerPC/use-cr-result-of-dom-icmp-st.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -O3 < %s | FileCheck %s -check-prefix=PPC64LE +; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu -O3 -ppc-asm-full-reg-names -verify-machineinstrs < %s | FileCheck %s ; Test cases are generated from: ; long long NAME(PARAM a, PARAM b) { @@ -13,22 +13,21 @@ ; for example. ll is PARAM, a_op_b (i.e., a << b) is LHS, _1 (i.e., -1) is RHS. target datalayout = "e-m:e-i64:64-n32:64" -target triple = "powerpc64le-unknown-linux-gnu" define i64 @ll_a_op_b__2(i64 %a, i64 %b) { -; PPC64LE-LABEL: ll_a_op_b__2: -; PPC64LE: # %bb.0: # %entry -; PPC64LE-NEXT: sld 5, 3, 4 -; PPC64LE-NEXT: cmpdi 5, -2 -; PPC64LE-NEXT: ble 0, .LBB0_2 -; PPC64LE-NEXT: # %bb.1: # %return -; PPC64LE-NEXT: mr 3, 4 -; PPC64LE-NEXT: blr -; PPC64LE-NEXT: .LBB0_2: # %if.end -; PPC64LE-NEXT: li 5, 1 -; PPC64LE-NEXT: isel 4, 4, 5, 2 -; PPC64LE-NEXT: mulld 3, 4, 3 -; PPC64LE-NEXT: blr +; CHECK-LABEL: ll_a_op_b__2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sld r5, r3, r4 +; CHECK-NEXT: cmpdi r5, -2 +; CHECK-NEXT: ble cr0, .LBB0_2 +; CHECK-NEXT: # %bb.1: # %return +; CHECK-NEXT: mr r3, r4 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB0_2: # %if.end +; CHECK-NEXT: li r5, 1 +; CHECK-NEXT: isel r4, r4, r5, eq +; CHECK-NEXT: mulld r3, r4, r3 +; CHECK-NEXT: blr entry: %shl = shl i64 %a, %b %cmp = icmp sgt i64 %shl, -2 @@ -45,19 +44,19 @@ return: ; preds = %entry } define i64 @ll_a_op_b__1(i64 %a, i64 %b) { -; PPC64LE-LABEL: ll_a_op_b__1: -; PPC64LE: # %bb.0: # %entry -; PPC64LE-NEXT: sld 5, 3, 4 -; PPC64LE-NEXT: cmpdi 5, -1 -; PPC64LE-NEXT: ble 0, .LBB1_2 -; PPC64LE-NEXT: # %bb.1: # %return -; PPC64LE-NEXT: mr 3, 4 -; PPC64LE-NEXT: blr -; PPC64LE-NEXT: .LBB1_2: # %if.end -; PPC64LE-NEXT: li 5, 1 -; PPC64LE-NEXT: isel 4, 4, 5, 2 -; PPC64LE-NEXT: mulld 3, 4, 3 -; PPC64LE-NEXT: blr +; CHECK-LABEL: ll_a_op_b__1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sld r5, r3, r4 +; CHECK-NEXT: cmpdi r5, -1 +; CHECK-NEXT: ble cr0, .LBB1_2 +; CHECK-NEXT: # %bb.1: # %return +; CHECK-NEXT: mr r3, r4 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB1_2: # %if.end +; CHECK-NEXT: li r5, 1 +; CHECK-NEXT: isel r4, r4, r5, eq +; CHECK-NEXT: mulld r3, r4, r3 +; CHECK-NEXT: blr entry: %shl = shl i64 %a, %b %cmp = icmp sgt i64 %shl, -1 @@ -74,19 +73,19 @@ return: ; preds = %entry } define i64 @ll_a_op_b_0(i64 %a, i64 %b) { -; PPC64LE-LABEL: ll_a_op_b_0: -; PPC64LE: # %bb.0: # %entry -; PPC64LE-NEXT: sld. 5, 3, 4 -; PPC64LE-NEXT: ble 0, .LBB2_2 -; PPC64LE-NEXT: # %bb.1: # %return -; PPC64LE-NEXT: mr 3, 4 -; PPC64LE-NEXT: blr -; PPC64LE-NEXT: .LBB2_2: # %if.end -; PPC64LE-NEXT: cmpldi 5, 0 -; PPC64LE-NEXT: li 5, 1 -; PPC64LE-NEXT: isel 4, 4, 5, 2 -; PPC64LE-NEXT: mulld 3, 4, 3 -; PPC64LE-NEXT: blr +; CHECK-LABEL: ll_a_op_b_0: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sld. r5, r3, r4 +; CHECK-NEXT: ble cr0, .LBB2_2 +; CHECK-NEXT: # %bb.1: # %return +; CHECK-NEXT: mr r3, r4 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB2_2: # %if.end +; CHECK-NEXT: cmpldi r5, 0 +; CHECK-NEXT: li r5, 1 +; CHECK-NEXT: isel r4, r4, r5, eq +; CHECK-NEXT: mulld r3, r4, r3 +; CHECK-NEXT: blr entry: %shl = shl i64 %a, %b %cmp = icmp sgt i64 %shl, 0 @@ -103,20 +102,20 @@ return: ; preds = %entry } define i64 @ll_a_op_b_1(i64 %a, i64 %b) { -; PPC64LE-LABEL: ll_a_op_b_1: -; PPC64LE: # %bb.0: # %entry -; PPC64LE-NEXT: sld 5, 3, 4 -; PPC64LE-NEXT: cmpdi 5, 1 -; PPC64LE-NEXT: ble 0, .LBB3_2 -; PPC64LE-NEXT: # %bb.1: # %return -; PPC64LE-NEXT: mr 3, 4 -; PPC64LE-NEXT: blr -; PPC64LE-NEXT: .LBB3_2: # %if.end -; PPC64LE-NEXT: cmpldi 5, 1 -; PPC64LE-NEXT: li 5, 1 -; PPC64LE-NEXT: isel 4, 4, 5, 2 -; PPC64LE-NEXT: mulld 3, 4, 3 -; PPC64LE-NEXT: blr +; CHECK-LABEL: ll_a_op_b_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sld r5, r3, r4 +; CHECK-NEXT: cmpdi r5, 1 +; CHECK-NEXT: ble cr0, .LBB3_2 +; CHECK-NEXT: # %bb.1: # %return +; CHECK-NEXT: mr r3, r4 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB3_2: # %if.end +; CHECK-NEXT: cmpldi r5, 1 +; CHECK-NEXT: li r5, 1 +; CHECK-NEXT: isel r4, r4, r5, eq +; CHECK-NEXT: mulld r3, r4, r3 +; CHECK-NEXT: blr entry: %shl = shl i64 %a, %b %cmp = icmp sgt i64 %shl, 1 @@ -133,20 +132,20 @@ return: ; preds = %entry } define i64 @ll_a_op_b_2(i64 %a, i64 %b) { -; PPC64LE-LABEL: ll_a_op_b_2: -; PPC64LE: # %bb.0: # %entry -; PPC64LE-NEXT: sld 5, 3, 4 -; PPC64LE-NEXT: cmpdi 5, 2 -; PPC64LE-NEXT: ble 0, .LBB4_2 -; PPC64LE-NEXT: # %bb.1: # %return -; PPC64LE-NEXT: mr 3, 4 -; PPC64LE-NEXT: blr -; PPC64LE-NEXT: .LBB4_2: # %if.end -; PPC64LE-NEXT: cmpldi 5, 2 -; PPC64LE-NEXT: li 5, 1 -; PPC64LE-NEXT: isel 4, 4, 5, 2 -; PPC64LE-NEXT: mulld 3, 4, 3 -; PPC64LE-NEXT: blr +; CHECK-LABEL: ll_a_op_b_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sld r5, r3, r4 +; CHECK-NEXT: cmpdi r5, 2 +; CHECK-NEXT: ble cr0, .LBB4_2 +; CHECK-NEXT: # %bb.1: # %return +; CHECK-NEXT: mr r3, r4 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB4_2: # %if.end +; CHECK-NEXT: cmpldi r5, 2 +; CHECK-NEXT: li r5, 1 +; CHECK-NEXT: isel r4, r4, r5, eq +; CHECK-NEXT: mulld r3, r4, r3 +; CHECK-NEXT: blr entry: %shl = shl i64 %a, %b %cmp = icmp sgt i64 %shl, 2 @@ -163,18 +162,18 @@ return: ; preds = %entry } define i64 @ll_a__2(i64 %a, i64 %b) { -; PPC64LE-LABEL: ll_a__2: -; PPC64LE: # %bb.0: # %entry -; PPC64LE-NEXT: cmpdi 3, -2 -; PPC64LE-NEXT: ble 0, .LBB5_2 -; PPC64LE-NEXT: # %bb.1: # %return -; PPC64LE-NEXT: mr 3, 4 -; PPC64LE-NEXT: blr -; PPC64LE-NEXT: .LBB5_2: # %if.end -; PPC64LE-NEXT: li 5, 1 -; PPC64LE-NEXT: isel 4, 4, 5, 2 -; PPC64LE-NEXT: mulld 3, 4, 3 -; PPC64LE-NEXT: blr +; CHECK-LABEL: ll_a__2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpdi r3, -2 +; CHECK-NEXT: ble cr0, .LBB5_2 +; CHECK-NEXT: # %bb.1: # %return +; CHECK-NEXT: mr r3, r4 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB5_2: # %if.end +; CHECK-NEXT: li r5, 1 +; CHECK-NEXT: isel r4, r4, r5, eq +; CHECK-NEXT: mulld r3, r4, r3 +; CHECK-NEXT: blr entry: %cmp = icmp sgt i64 %a, -2 br i1 %cmp, label %return, label %if.end @@ -190,18 +189,18 @@ return: ; preds = %entry } define i64 @ll_a__1(i64 %a, i64 %b) { -; PPC64LE-LABEL: ll_a__1: -; PPC64LE: # %bb.0: # %entry -; PPC64LE-NEXT: cmpdi 3, -1 -; PPC64LE-NEXT: ble 0, .LBB6_2 -; PPC64LE-NEXT: # %bb.1: # %return -; PPC64LE-NEXT: mr 3, 4 -; PPC64LE-NEXT: blr -; PPC64LE-NEXT: .LBB6_2: # %if.end -; PPC64LE-NEXT: li 5, 1 -; PPC64LE-NEXT: isel 4, 4, 5, 2 -; PPC64LE-NEXT: mulld 3, 4, 3 -; PPC64LE-NEXT: blr +; CHECK-LABEL: ll_a__1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpdi r3, -1 +; CHECK-NEXT: ble cr0, .LBB6_2 +; CHECK-NEXT: # %bb.1: # %return +; CHECK-NEXT: mr r3, r4 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB6_2: # %if.end +; CHECK-NEXT: li r5, 1 +; CHECK-NEXT: isel r4, r4, r5, eq +; CHECK-NEXT: mulld r3, r4, r3 +; CHECK-NEXT: blr entry: %cmp = icmp sgt i64 %a, -1 br i1 %cmp, label %return, label %if.end @@ -217,19 +216,19 @@ return: ; preds = %entry } define i64 @ll_a_0(i64 %a, i64 %b) { -; PPC64LE-LABEL: ll_a_0: -; PPC64LE: # %bb.0: # %entry -; PPC64LE-NEXT: cmpdi 3, 0 -; PPC64LE-NEXT: ble 0, .LBB7_2 -; PPC64LE-NEXT: # %bb.1: # %return -; PPC64LE-NEXT: mr 3, 4 -; PPC64LE-NEXT: blr -; PPC64LE-NEXT: .LBB7_2: # %if.end -; PPC64LE-NEXT: cmpldi 3, 0 -; PPC64LE-NEXT: li 5, 1 -; PPC64LE-NEXT: isel 4, 4, 5, 2 -; PPC64LE-NEXT: mulld 3, 4, 3 -; PPC64LE-NEXT: blr +; CHECK-LABEL: ll_a_0: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpdi r3, 0 +; CHECK-NEXT: ble cr0, .LBB7_2 +; CHECK-NEXT: # %bb.1: # %return +; CHECK-NEXT: mr r3, r4 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB7_2: # %if.end +; CHECK-NEXT: cmpldi r3, 0 +; CHECK-NEXT: li r5, 1 +; CHECK-NEXT: isel r4, r4, r5, eq +; CHECK-NEXT: mulld r3, r4, r3 +; CHECK-NEXT: blr entry: %cmp = icmp sgt i64 %a, 0 br i1 %cmp, label %return, label %if.end @@ -245,19 +244,19 @@ return: ; preds = %entry } define i64 @ll_a_1(i64 %a, i64 %b) { -; PPC64LE-LABEL: ll_a_1: -; PPC64LE: # %bb.0: # %entry -; PPC64LE-NEXT: cmpdi 3, 1 -; PPC64LE-NEXT: ble 0, .LBB8_2 -; PPC64LE-NEXT: # %bb.1: # %return -; PPC64LE-NEXT: mr 3, 4 -; PPC64LE-NEXT: blr -; PPC64LE-NEXT: .LBB8_2: # %if.end -; PPC64LE-NEXT: cmpldi 3, 1 -; PPC64LE-NEXT: li 5, 1 -; PPC64LE-NEXT: isel 4, 4, 5, 2 -; PPC64LE-NEXT: mulld 3, 4, 3 -; PPC64LE-NEXT: blr +; CHECK-LABEL: ll_a_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpdi r3, 1 +; CHECK-NEXT: ble cr0, .LBB8_2 +; CHECK-NEXT: # %bb.1: # %return +; CHECK-NEXT: mr r3, r4 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB8_2: # %if.end +; CHECK-NEXT: cmpldi r3, 1 +; CHECK-NEXT: li r5, 1 +; CHECK-NEXT: isel r4, r4, r5, eq +; CHECK-NEXT: mulld r3, r4, r3 +; CHECK-NEXT: blr entry: %cmp = icmp sgt i64 %a, 1 br i1 %cmp, label %return, label %if.end @@ -273,19 +272,19 @@ return: ; preds = %entry } define i64 @ll_a_2(i64 %a, i64 %b) { -; PPC64LE-LABEL: ll_a_2: -; PPC64LE: # %bb.0: # %entry -; PPC64LE-NEXT: cmpdi 3, 2 -; PPC64LE-NEXT: ble 0, .LBB9_2 -; PPC64LE-NEXT: # %bb.1: # %return -; PPC64LE-NEXT: mr 3, 4 -; PPC64LE-NEXT: blr -; PPC64LE-NEXT: .LBB9_2: # %if.end -; PPC64LE-NEXT: cmpldi 3, 2 -; PPC64LE-NEXT: li 5, 1 -; PPC64LE-NEXT: isel 4, 4, 5, 2 -; PPC64LE-NEXT: mulld 3, 4, 3 -; PPC64LE-NEXT: blr +; CHECK-LABEL: ll_a_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpdi r3, 2 +; CHECK-NEXT: ble cr0, .LBB9_2 +; CHECK-NEXT: # %bb.1: # %return +; CHECK-NEXT: mr r3, r4 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB9_2: # %if.end +; CHECK-NEXT: cmpldi r3, 2 +; CHECK-NEXT: li r5, 1 +; CHECK-NEXT: isel r4, r4, r5, eq +; CHECK-NEXT: mulld r3, r4, r3 +; CHECK-NEXT: blr entry: %cmp = icmp sgt i64 %a, 2 br i1 %cmp, label %return, label %if.end @@ -301,16 +300,18 @@ return: ; preds = %entry } define i64 @i_a_op_b__2(i32 signext %a, i32 signext %b) { -; PPC64LE-LABEL: i_a_op_b__2: -; PPC64LE: # %bb.0: # %entry -; PPC64LE-NEXT: slw 6, 3, 4 -; PPC64LE-NEXT: li 5, 1 -; PPC64LE-NEXT: cmpwi 6, -2 -; PPC64LE-NEXT: isel 5, 4, 5, 2 -; PPC64LE-NEXT: mullw 3, 5, 3 -; PPC64LE-NEXT: isel 3, 4, 3, 1 -; PPC64LE-NEXT: extsw 3, 3 -; PPC64LE-NEXT: blr +; CHECK-LABEL: i_a_op_b__2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: slw r5, r3, r4 +; CHECK-NEXT: cmpwi r5, -2 +; CHECK-NEXT: bgt cr0, .LBB10_2 +; CHECK-NEXT: # %bb.1: # %if.end +; CHECK-NEXT: li r5, 1 +; CHECK-NEXT: isel r4, r4, r5, eq +; CHECK-NEXT: mullw r4, r4, r3 +; CHECK-NEXT: .LBB10_2: # %return +; CHECK-NEXT: extsw r3, r4 +; CHECK-NEXT: blr entry: %shl = shl i32 %a, %b %cmp = icmp sgt i32 %shl, -2 @@ -329,16 +330,20 @@ return: ; preds = %if.end, %entry } define i64 @i_a_op_b__1(i32 signext %a, i32 signext %b) { -; PPC64LE-LABEL: i_a_op_b__1: -; PPC64LE: # %bb.0: # %entry -; PPC64LE-NEXT: slw 6, 3, 4 -; PPC64LE-NEXT: li 5, 1 -; PPC64LE-NEXT: cmpwi 6, -1 -; PPC64LE-NEXT: isel 5, 4, 5, 2 -; PPC64LE-NEXT: mullw 3, 5, 3 -; PPC64LE-NEXT: isel 3, 4, 3, 1 -; PPC64LE-NEXT: extsw 3, 3 -; PPC64LE-NEXT: blr +; CHECK-LABEL: i_a_op_b__1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: slw r5, r3, r4 +; CHECK-NEXT: cmpwi r5, -1 +; CHECK-NEXT: ble cr0, .LBB11_2 +; CHECK-NEXT: # %bb.1: # %return +; CHECK-NEXT: extsw r3, r4 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB11_2: # %if.end +; CHECK-NEXT: li r5, 1 +; CHECK-NEXT: isel r4, r4, r5, eq +; CHECK-NEXT: mullw r4, r4, r3 +; CHECK-NEXT: extsw r3, r4 +; CHECK-NEXT: blr entry: %shl = shl i32 %a, %b %cmp = icmp sgt i32 %shl, -1 @@ -357,16 +362,21 @@ return: ; preds = %if.end, %entry } define i64 @i_a_op_b_0(i32 signext %a, i32 signext %b) { -; PPC64LE-LABEL: i_a_op_b_0: -; PPC64LE: # %bb.0: # %entry -; PPC64LE-NEXT: slw. 5, 3, 4 -; PPC64LE-NEXT: li 6, 1 -; PPC64LE-NEXT: isel 6, 4, 6, 2 -; PPC64LE-NEXT: cmpwi 5, 0 -; PPC64LE-NEXT: mullw 3, 6, 3 -; PPC64LE-NEXT: isel 3, 4, 3, 1 -; PPC64LE-NEXT: extsw 3, 3 -; PPC64LE-NEXT: blr +; CHECK-LABEL: i_a_op_b_0: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: slw r5, r3, r4 +; CHECK-NEXT: cmpwi r5, 0 +; CHECK-NEXT: ble cr0, .LBB12_2 +; CHECK-NEXT: # %bb.1: # %return +; CHECK-NEXT: extsw r3, r4 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB12_2: # %if.end +; CHECK-NEXT: cmplwi r5, 0 +; CHECK-NEXT: li r5, 1 +; CHECK-NEXT: isel r4, r4, r5, eq +; CHECK-NEXT: mullw r4, r4, r3 +; CHECK-NEXT: extsw r3, r4 +; CHECK-NEXT: blr entry: %shl = shl i32 %a, %b %cmp = icmp sgt i32 %shl, 0 @@ -385,17 +395,19 @@ return: ; preds = %if.end, %entry } define i64 @i_a_op_b_1(i32 signext %a, i32 signext %b) { -; PPC64LE-LABEL: i_a_op_b_1: -; PPC64LE: # %bb.0: # %entry -; PPC64LE-NEXT: slw 6, 3, 4 -; PPC64LE-NEXT: li 5, 1 -; PPC64LE-NEXT: cmplwi 6, 1 -; PPC64LE-NEXT: isel 5, 4, 5, 2 -; PPC64LE-NEXT: cmpwi 6, 1 -; PPC64LE-NEXT: mullw 3, 5, 3 -; PPC64LE-NEXT: isel 3, 4, 3, 1 -; PPC64LE-NEXT: extsw 3, 3 -; PPC64LE-NEXT: blr +; CHECK-LABEL: i_a_op_b_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: slw r5, r3, r4 +; CHECK-NEXT: cmpwi r5, 1 +; CHECK-NEXT: bgt cr0, .LBB13_2 +; CHECK-NEXT: # %bb.1: # %if.end +; CHECK-NEXT: cmplwi r5, 1 +; CHECK-NEXT: li r5, 1 +; CHECK-NEXT: isel r4, r4, r5, eq +; CHECK-NEXT: mullw r4, r4, r3 +; CHECK-NEXT: .LBB13_2: # %return +; CHECK-NEXT: extsw r3, r4 +; CHECK-NEXT: blr entry: %shl = shl i32 %a, %b %cmp = icmp sgt i32 %shl, 1 @@ -414,17 +426,19 @@ return: ; preds = %if.end, %entry } define i64 @i_a_op_b_2(i32 signext %a, i32 signext %b) { -; PPC64LE-LABEL: i_a_op_b_2: -; PPC64LE: # %bb.0: # %entry -; PPC64LE-NEXT: slw 6, 3, 4 -; PPC64LE-NEXT: li 5, 1 -; PPC64LE-NEXT: cmplwi 6, 2 -; PPC64LE-NEXT: isel 5, 4, 5, 2 -; PPC64LE-NEXT: cmpwi 6, 2 -; PPC64LE-NEXT: mullw 3, 5, 3 -; PPC64LE-NEXT: isel 3, 4, 3, 1 -; PPC64LE-NEXT: extsw 3, 3 -; PPC64LE-NEXT: blr +; CHECK-LABEL: i_a_op_b_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: slw r5, r3, r4 +; CHECK-NEXT: cmpwi r5, 2 +; CHECK-NEXT: bgt cr0, .LBB14_2 +; CHECK-NEXT: # %bb.1: # %if.end +; CHECK-NEXT: cmplwi r5, 2 +; CHECK-NEXT: li r5, 1 +; CHECK-NEXT: isel r4, r4, r5, eq +; CHECK-NEXT: mullw r4, r4, r3 +; CHECK-NEXT: .LBB14_2: # %return +; CHECK-NEXT: extsw r3, r4 +; CHECK-NEXT: blr entry: %shl = shl i32 %a, %b %cmp = icmp sgt i32 %shl, 2 @@ -443,15 +457,17 @@ return: ; preds = %if.end, %entry } define i64 @i_a__2(i32 signext %a, i32 signext %b) { -; PPC64LE-LABEL: i_a__2: -; PPC64LE: # %bb.0: # %entry -; PPC64LE-NEXT: li 5, 1 -; PPC64LE-NEXT: cmpwi 3, -2 -; PPC64LE-NEXT: isel 5, 4, 5, 2 -; PPC64LE-NEXT: mullw 3, 5, 3 -; PPC64LE-NEXT: isel 3, 4, 3, 1 -; PPC64LE-NEXT: extsw 3, 3 -; PPC64LE-NEXT: blr +; CHECK-LABEL: i_a__2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpwi r3, -2 +; CHECK-NEXT: bgt cr0, .LBB15_2 +; CHECK-NEXT: # %bb.1: # %if.end +; CHECK-NEXT: li r5, 1 +; CHECK-NEXT: isel r4, r4, r5, eq +; CHECK-NEXT: mullw r4, r4, r3 +; CHECK-NEXT: .LBB15_2: # %return +; CHECK-NEXT: extsw r3, r4 +; CHECK-NEXT: blr entry: %cmp = icmp sgt i32 %a, -2 br i1 %cmp, label %return, label %if.end @@ -469,15 +485,19 @@ return: ; preds = %if.end, %entry } define i64 @i_a__1(i32 signext %a, i32 signext %b) { -; PPC64LE-LABEL: i_a__1: -; PPC64LE: # %bb.0: # %entry -; PPC64LE-NEXT: li 5, 1 -; PPC64LE-NEXT: cmpwi 3, -1 -; PPC64LE-NEXT: isel 5, 4, 5, 2 -; PPC64LE-NEXT: mullw 3, 5, 3 -; PPC64LE-NEXT: isel 3, 4, 3, 1 -; PPC64LE-NEXT: extsw 3, 3 -; PPC64LE-NEXT: blr +; CHECK-LABEL: i_a__1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpwi r3, -1 +; CHECK-NEXT: ble cr0, .LBB16_2 +; CHECK-NEXT: # %bb.1: # %return +; CHECK-NEXT: extsw r3, r4 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB16_2: # %if.end +; CHECK-NEXT: li r5, 1 +; CHECK-NEXT: isel r4, r4, r5, eq +; CHECK-NEXT: mullw r4, r4, r3 +; CHECK-NEXT: extsw r3, r4 +; CHECK-NEXT: blr entry: %cmp = icmp sgt i32 %a, -1 br i1 %cmp, label %return, label %if.end @@ -495,16 +515,20 @@ return: ; preds = %if.end, %entry } define i64 @i_a_0(i32 signext %a, i32 signext %b) { -; PPC64LE-LABEL: i_a_0: -; PPC64LE: # %bb.0: # %entry -; PPC64LE-NEXT: li 5, 1 -; PPC64LE-NEXT: cmplwi 3, 0 -; PPC64LE-NEXT: isel 5, 4, 5, 2 -; PPC64LE-NEXT: cmpwi 0, 3, 0 -; PPC64LE-NEXT: mullw 5, 5, 3 -; PPC64LE-NEXT: isel 3, 4, 5, 1 -; PPC64LE-NEXT: extsw 3, 3 -; PPC64LE-NEXT: blr +; CHECK-LABEL: i_a_0: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpwi r3, 0 +; CHECK-NEXT: ble cr0, .LBB17_2 +; CHECK-NEXT: # %bb.1: # %return +; CHECK-NEXT: extsw r3, r4 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB17_2: # %if.end +; CHECK-NEXT: cmplwi r3, 0 +; CHECK-NEXT: li r5, 1 +; CHECK-NEXT: isel r4, r4, r5, eq +; CHECK-NEXT: mullw r4, r4, r3 +; CHECK-NEXT: extsw r3, r4 +; CHECK-NEXT: blr entry: %cmp = icmp sgt i32 %a, 0 br i1 %cmp, label %return, label %if.end @@ -522,16 +546,18 @@ return: ; preds = %if.end, %entry } define i64 @i_a_1(i32 signext %a, i32 signext %b) { -; PPC64LE-LABEL: i_a_1: -; PPC64LE: # %bb.0: # %entry -; PPC64LE-NEXT: li 5, 1 -; PPC64LE-NEXT: cmplwi 3, 1 -; PPC64LE-NEXT: isel 5, 4, 5, 2 -; PPC64LE-NEXT: cmpwi 0, 3, 1 -; PPC64LE-NEXT: mullw 5, 5, 3 -; PPC64LE-NEXT: isel 3, 4, 5, 1 -; PPC64LE-NEXT: extsw 3, 3 -; PPC64LE-NEXT: blr +; CHECK-LABEL: i_a_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpwi r3, 1 +; CHECK-NEXT: bgt cr0, .LBB18_2 +; CHECK-NEXT: # %bb.1: # %if.end +; CHECK-NEXT: cmplwi r3, 1 +; CHECK-NEXT: li r5, 1 +; CHECK-NEXT: isel r4, r4, r5, eq +; CHECK-NEXT: mullw r4, r4, r3 +; CHECK-NEXT: .LBB18_2: # %return +; CHECK-NEXT: extsw r3, r4 +; CHECK-NEXT: blr entry: %cmp = icmp sgt i32 %a, 1 br i1 %cmp, label %return, label %if.end @@ -549,16 +575,18 @@ return: ; preds = %if.end, %entry } define i64 @i_a_2(i32 signext %a, i32 signext %b) { -; PPC64LE-LABEL: i_a_2: -; PPC64LE: # %bb.0: # %entry -; PPC64LE-NEXT: li 5, 1 -; PPC64LE-NEXT: cmplwi 3, 2 -; PPC64LE-NEXT: isel 5, 4, 5, 2 -; PPC64LE-NEXT: cmpwi 0, 3, 2 -; PPC64LE-NEXT: mullw 5, 5, 3 -; PPC64LE-NEXT: isel 3, 4, 5, 1 -; PPC64LE-NEXT: extsw 3, 3 -; PPC64LE-NEXT: blr +; CHECK-LABEL: i_a_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpwi r3, 2 +; CHECK-NEXT: bgt cr0, .LBB19_2 +; CHECK-NEXT: # %bb.1: # %if.end +; CHECK-NEXT: cmplwi r3, 2 +; CHECK-NEXT: li r5, 1 +; CHECK-NEXT: isel r4, r4, r5, eq +; CHECK-NEXT: mullw r4, r4, r3 +; CHECK-NEXT: .LBB19_2: # %return +; CHECK-NEXT: extsw r3, r4 +; CHECK-NEXT: blr entry: %cmp = icmp sgt i32 %a, 2 br i1 %cmp, label %return, label %if.end diff --git a/llvm/test/CodeGen/X86/use-cr-result-of-dom-icmp-st.ll b/llvm/test/CodeGen/X86/use-cr-result-of-dom-icmp-st.ll new file mode 100644 index 0000000000000..adb9eb2d49c29 --- /dev/null +++ b/llvm/test/CodeGen/X86/use-cr-result-of-dom-icmp-st.ll @@ -0,0 +1,627 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=x86_64-unknown-unknown -O3 -verify-machineinstrs < %s | FileCheck %s + +; Test cases are generated from: +; long long NAME(PARAM a, PARAM b) { +; if (LHS > RHS) +; return b; +; if (LHS < RHS) +; return a;\ +; return a * b; +; } +; Please note funtion name is defined as __. Take ll_a_op_b__1 +; for example. ll is PARAM, a_op_b (i.e., a << b) is LHS, _1 (i.e., -1) is RHS. + +target datalayout = "e-m:e-i64:64-n32:64" + +define i64 @ll_a_op_b__2(i64 %a, i64 %b) { +; CHECK-LABEL: ll_a_op_b__2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movq %rsi, %rcx +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: shlq %cl, %rax +; CHECK-NEXT: cmpq $-2, %rax +; CHECK-NEXT: jle .LBB0_1 +; CHECK-NEXT: # %bb.2: # %return +; CHECK-NEXT: movq %rcx, %rax +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB0_1: # %if.end +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: cmoveq %rcx, %rax +; CHECK-NEXT: imulq %rdi, %rax +; CHECK-NEXT: retq +entry: + %shl = shl i64 %a, %b + %cmp = icmp sgt i64 %shl, -2 + br i1 %cmp, label %return, label %if.end + +if.end: ; preds = %entry + %cmp2 = icmp eq i64 %shl, -2 + %mul = select i1 %cmp2, i64 %b, i64 1 + %spec.select = mul nsw i64 %mul, %a + ret i64 %spec.select + +return: ; preds = %entry + ret i64 %b +} + +define i64 @ll_a_op_b__1(i64 %a, i64 %b) { +; CHECK-LABEL: ll_a_op_b__1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movq %rsi, %rcx +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: shlq %cl, %rax +; CHECK-NEXT: testq %rax, %rax +; CHECK-NEXT: js .LBB1_1 +; CHECK-NEXT: # %bb.2: # %return +; CHECK-NEXT: movq %rcx, %rax +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB1_1: # %if.end +; CHECK-NEXT: cmpq $-1, %rax +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: cmoveq %rcx, %rax +; CHECK-NEXT: imulq %rdi, %rax +; CHECK-NEXT: retq +entry: + %shl = shl i64 %a, %b + %cmp = icmp sgt i64 %shl, -1 + br i1 %cmp, label %return, label %if.end + +if.end: ; preds = %entry + %cmp2 = icmp eq i64 %shl, -1 + %mul = select i1 %cmp2, i64 %b, i64 1 + %spec.select = mul nsw i64 %mul, %a + ret i64 %spec.select + +return: ; preds = %entry + ret i64 %b +} + +define i64 @ll_a_op_b_0(i64 %a, i64 %b) { +; CHECK-LABEL: ll_a_op_b_0: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movq %rsi, %rcx +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: shlq %cl, %rax +; CHECK-NEXT: testq %rax, %rax +; CHECK-NEXT: jle .LBB2_1 +; CHECK-NEXT: # %bb.2: # %return +; CHECK-NEXT: movq %rcx, %rax +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB2_1: # %if.end +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: cmoveq %rcx, %rax +; CHECK-NEXT: imulq %rdi, %rax +; CHECK-NEXT: retq +entry: + %shl = shl i64 %a, %b + %cmp = icmp sgt i64 %shl, 0 + br i1 %cmp, label %return, label %if.end + +if.end: ; preds = %entry + %cmp2 = icmp eq i64 %shl, 0 + %mul = select i1 %cmp2, i64 %b, i64 1 + %spec.select = mul nsw i64 %mul, %a + ret i64 %spec.select + +return: ; preds = %entry + ret i64 %b +} + +define i64 @ll_a_op_b_1(i64 %a, i64 %b) { +; CHECK-LABEL: ll_a_op_b_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movq %rsi, %rcx +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: shlq %cl, %rax +; CHECK-NEXT: cmpq $1, %rax +; CHECK-NEXT: jle .LBB3_1 +; CHECK-NEXT: # %bb.2: # %return +; CHECK-NEXT: movq %rcx, %rax +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB3_1: # %if.end +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: cmoveq %rcx, %rax +; CHECK-NEXT: imulq %rdi, %rax +; CHECK-NEXT: retq +entry: + %shl = shl i64 %a, %b + %cmp = icmp sgt i64 %shl, 1 + br i1 %cmp, label %return, label %if.end + +if.end: ; preds = %entry + %cmp2 = icmp eq i64 %shl, 1 + %mul = select i1 %cmp2, i64 %b, i64 1 + %spec.select = mul nsw i64 %mul, %a + ret i64 %spec.select + +return: ; preds = %entry + ret i64 %b +} + +define i64 @ll_a_op_b_2(i64 %a, i64 %b) { +; CHECK-LABEL: ll_a_op_b_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movq %rsi, %rcx +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: shlq %cl, %rax +; CHECK-NEXT: cmpq $2, %rax +; CHECK-NEXT: jle .LBB4_1 +; CHECK-NEXT: # %bb.2: # %return +; CHECK-NEXT: movq %rcx, %rax +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB4_1: # %if.end +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: cmoveq %rcx, %rax +; CHECK-NEXT: imulq %rdi, %rax +; CHECK-NEXT: retq +entry: + %shl = shl i64 %a, %b + %cmp = icmp sgt i64 %shl, 2 + br i1 %cmp, label %return, label %if.end + +if.end: ; preds = %entry + %cmp2 = icmp eq i64 %shl, 2 + %mul = select i1 %cmp2, i64 %b, i64 1 + %spec.select = mul nsw i64 %mul, %a + ret i64 %spec.select + +return: ; preds = %entry + ret i64 %b +} + +define i64 @ll_a__2(i64 %a, i64 %b) { +; CHECK-LABEL: ll_a__2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpq $-2, %rdi +; CHECK-NEXT: jle .LBB5_1 +; CHECK-NEXT: # %bb.2: # %return +; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB5_1: # %if.end +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: cmoveq %rsi, %rax +; CHECK-NEXT: imulq %rdi, %rax +; CHECK-NEXT: retq +entry: + %cmp = icmp sgt i64 %a, -2 + br i1 %cmp, label %return, label %if.end + +if.end: ; preds = %entry + %cmp1 = icmp eq i64 %a, -2 + %mul = select i1 %cmp1, i64 %b, i64 1 + %spec.select = mul nsw i64 %mul, %a + ret i64 %spec.select + +return: ; preds = %entry + ret i64 %b +} + +define i64 @ll_a__1(i64 %a, i64 %b) { +; CHECK-LABEL: ll_a__1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: testq %rdi, %rdi +; CHECK-NEXT: js .LBB6_1 +; CHECK-NEXT: # %bb.2: # %return +; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB6_1: # %if.end +; CHECK-NEXT: cmpq $-1, %rdi +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: cmoveq %rsi, %rax +; CHECK-NEXT: imulq %rdi, %rax +; CHECK-NEXT: retq +entry: + %cmp = icmp sgt i64 %a, -1 + br i1 %cmp, label %return, label %if.end + +if.end: ; preds = %entry + %cmp1 = icmp eq i64 %a, -1 + %mul = select i1 %cmp1, i64 %b, i64 1 + %spec.select = mul nsw i64 %mul, %a + ret i64 %spec.select + +return: ; preds = %entry + ret i64 %b +} + +define i64 @ll_a_0(i64 %a, i64 %b) { +; CHECK-LABEL: ll_a_0: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: testq %rdi, %rdi +; CHECK-NEXT: jle .LBB7_1 +; CHECK-NEXT: # %bb.2: # %return +; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB7_1: # %if.end +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: cmoveq %rsi, %rax +; CHECK-NEXT: imulq %rdi, %rax +; CHECK-NEXT: retq +entry: + %cmp = icmp sgt i64 %a, 0 + br i1 %cmp, label %return, label %if.end + +if.end: ; preds = %entry + %cmp1 = icmp eq i64 %a, 0 + %mul = select i1 %cmp1, i64 %b, i64 1 + %spec.select = mul nsw i64 %mul, %a + ret i64 %spec.select + +return: ; preds = %entry + ret i64 %b +} + +define i64 @ll_a_1(i64 %a, i64 %b) { +; CHECK-LABEL: ll_a_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpq $1, %rdi +; CHECK-NEXT: jle .LBB8_1 +; CHECK-NEXT: # %bb.2: # %return +; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB8_1: # %if.end +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: cmoveq %rsi, %rax +; CHECK-NEXT: imulq %rdi, %rax +; CHECK-NEXT: retq +entry: + %cmp = icmp sgt i64 %a, 1 + br i1 %cmp, label %return, label %if.end + +if.end: ; preds = %entry + %cmp1 = icmp eq i64 %a, 1 + %mul = select i1 %cmp1, i64 %b, i64 1 + %spec.select = mul nsw i64 %mul, %a + ret i64 %spec.select + +return: ; preds = %entry + ret i64 %b +} + +define i64 @ll_a_2(i64 %a, i64 %b) { +; CHECK-LABEL: ll_a_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpq $2, %rdi +; CHECK-NEXT: jle .LBB9_1 +; CHECK-NEXT: # %bb.2: # %return +; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB9_1: # %if.end +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: cmoveq %rsi, %rax +; CHECK-NEXT: imulq %rdi, %rax +; CHECK-NEXT: retq +entry: + %cmp = icmp sgt i64 %a, 2 + br i1 %cmp, label %return, label %if.end + +if.end: ; preds = %entry + %cmp1 = icmp eq i64 %a, 2 + %mul = select i1 %cmp1, i64 %b, i64 1 + %spec.select = mul nsw i64 %mul, %a + ret i64 %spec.select + +return: ; preds = %entry + ret i64 %b +} + +define i64 @i_a_op_b__2(i32 signext %a, i32 signext %b) { +; CHECK-LABEL: i_a_op_b__2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movl %esi, %ecx +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: shll %cl, %eax +; CHECK-NEXT: cmpl $-2, %eax +; CHECK-NEXT: jg .LBB10_2 +; CHECK-NEXT: # %bb.1: # %if.end +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: cmovel %ecx, %eax +; CHECK-NEXT: imull %edi, %eax +; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: .LBB10_2: # %return +; CHECK-NEXT: movslq %ecx, %rax +; CHECK-NEXT: retq +entry: + %shl = shl i32 %a, %b + %cmp = icmp sgt i32 %shl, -2 + br i1 %cmp, label %return, label %if.end + +if.end: ; preds = %entry + %cmp2 = icmp eq i32 %shl, -2 + %mul = select i1 %cmp2, i32 %b, i32 1 + %spec.select = mul nsw i32 %mul, %a + br label %return + +return: ; preds = %if.end, %entry + %retval.0.in = phi i32 [ %b, %entry ], [ %spec.select, %if.end ] + %retval.0 = sext i32 %retval.0.in to i64 + ret i64 %retval.0 +} + +define i64 @i_a_op_b__1(i32 signext %a, i32 signext %b) { +; CHECK-LABEL: i_a_op_b__1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movl %esi, %ecx +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: shll %cl, %eax +; CHECK-NEXT: testl %eax, %eax +; CHECK-NEXT: js .LBB11_1 +; CHECK-NEXT: # %bb.2: # %return +; CHECK-NEXT: movslq %ecx, %rax +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB11_1: # %if.end +; CHECK-NEXT: cmpl $-1, %eax +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: cmovel %ecx, %eax +; CHECK-NEXT: imull %edi, %eax +; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: movslq %ecx, %rax +; CHECK-NEXT: retq +entry: + %shl = shl i32 %a, %b + %cmp = icmp sgt i32 %shl, -1 + br i1 %cmp, label %return, label %if.end + +if.end: ; preds = %entry + %cmp2 = icmp eq i32 %shl, -1 + %mul = select i1 %cmp2, i32 %b, i32 1 + %spec.select = mul nsw i32 %mul, %a + br label %return + +return: ; preds = %if.end, %entry + %retval.0.in = phi i32 [ %b, %entry ], [ %spec.select, %if.end ] + %retval.0 = sext i32 %retval.0.in to i64 + ret i64 %retval.0 +} + +define i64 @i_a_op_b_0(i32 signext %a, i32 signext %b) { +; CHECK-LABEL: i_a_op_b_0: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movl %esi, %ecx +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: shll %cl, %eax +; CHECK-NEXT: testl %eax, %eax +; CHECK-NEXT: jle .LBB12_1 +; CHECK-NEXT: # %bb.2: # %return +; CHECK-NEXT: movslq %ecx, %rax +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB12_1: # %if.end +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: cmovel %ecx, %eax +; CHECK-NEXT: imull %edi, %eax +; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: movslq %ecx, %rax +; CHECK-NEXT: retq +entry: + %shl = shl i32 %a, %b + %cmp = icmp sgt i32 %shl, 0 + br i1 %cmp, label %return, label %if.end + +if.end: ; preds = %entry + %cmp2 = icmp eq i32 %shl, 0 + %mul = select i1 %cmp2, i32 %b, i32 1 + %spec.select = mul nsw i32 %mul, %a + br label %return + +return: ; preds = %if.end, %entry + %retval.0.in = phi i32 [ %b, %entry ], [ %spec.select, %if.end ] + %retval.0 = sext i32 %retval.0.in to i64 + ret i64 %retval.0 +} + +define i64 @i_a_op_b_1(i32 signext %a, i32 signext %b) { +; CHECK-LABEL: i_a_op_b_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movl %esi, %ecx +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: shll %cl, %eax +; CHECK-NEXT: cmpl $1, %eax +; CHECK-NEXT: jg .LBB13_2 +; CHECK-NEXT: # %bb.1: # %if.end +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: cmovel %ecx, %eax +; CHECK-NEXT: imull %edi, %eax +; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: .LBB13_2: # %return +; CHECK-NEXT: movslq %ecx, %rax +; CHECK-NEXT: retq +entry: + %shl = shl i32 %a, %b + %cmp = icmp sgt i32 %shl, 1 + br i1 %cmp, label %return, label %if.end + +if.end: ; preds = %entry + %cmp2 = icmp eq i32 %shl, 1 + %mul = select i1 %cmp2, i32 %b, i32 1 + %spec.select = mul nsw i32 %mul, %a + br label %return + +return: ; preds = %if.end, %entry + %retval.0.in = phi i32 [ %b, %entry ], [ %spec.select, %if.end ] + %retval.0 = sext i32 %retval.0.in to i64 + ret i64 %retval.0 +} + +define i64 @i_a_op_b_2(i32 signext %a, i32 signext %b) { +; CHECK-LABEL: i_a_op_b_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movl %esi, %ecx +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: shll %cl, %eax +; CHECK-NEXT: cmpl $2, %eax +; CHECK-NEXT: jg .LBB14_2 +; CHECK-NEXT: # %bb.1: # %if.end +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: cmovel %ecx, %eax +; CHECK-NEXT: imull %edi, %eax +; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: .LBB14_2: # %return +; CHECK-NEXT: movslq %ecx, %rax +; CHECK-NEXT: retq +entry: + %shl = shl i32 %a, %b + %cmp = icmp sgt i32 %shl, 2 + br i1 %cmp, label %return, label %if.end + +if.end: ; preds = %entry + %cmp2 = icmp eq i32 %shl, 2 + %mul = select i1 %cmp2, i32 %b, i32 1 + %spec.select = mul nsw i32 %mul, %a + br label %return + +return: ; preds = %if.end, %entry + %retval.0.in = phi i32 [ %b, %entry ], [ %spec.select, %if.end ] + %retval.0 = sext i32 %retval.0.in to i64 + ret i64 %retval.0 +} + +define i64 @i_a__2(i32 signext %a, i32 signext %b) { +; CHECK-LABEL: i_a__2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpl $-2, %edi +; CHECK-NEXT: jg .LBB15_2 +; CHECK-NEXT: # %bb.1: # %if.end +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: cmovel %esi, %eax +; CHECK-NEXT: imull %edi, %eax +; CHECK-NEXT: movl %eax, %esi +; CHECK-NEXT: .LBB15_2: # %return +; CHECK-NEXT: movslq %esi, %rax +; CHECK-NEXT: retq +entry: + %cmp = icmp sgt i32 %a, -2 + br i1 %cmp, label %return, label %if.end + +if.end: ; preds = %entry + %cmp1 = icmp eq i32 %a, -2 + %mul = select i1 %cmp1, i32 %b, i32 1 + %spec.select = mul nsw i32 %mul, %a + br label %return + +return: ; preds = %if.end, %entry + %retval.0.in = phi i32 [ %b, %entry ], [ %spec.select, %if.end ] + %retval.0 = sext i32 %retval.0.in to i64 + ret i64 %retval.0 +} + +define i64 @i_a__1(i32 signext %a, i32 signext %b) { +; CHECK-LABEL: i_a__1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: js .LBB16_1 +; CHECK-NEXT: # %bb.2: # %return +; CHECK-NEXT: movslq %esi, %rax +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB16_1: # %if.end +; CHECK-NEXT: cmpl $-1, %edi +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: cmovel %esi, %eax +; CHECK-NEXT: imull %edi, %eax +; CHECK-NEXT: movl %eax, %esi +; CHECK-NEXT: movslq %esi, %rax +; CHECK-NEXT: retq +entry: + %cmp = icmp sgt i32 %a, -1 + br i1 %cmp, label %return, label %if.end + +if.end: ; preds = %entry + %cmp1 = icmp eq i32 %a, -1 + %mul = select i1 %cmp1, i32 %b, i32 1 + %spec.select = mul nsw i32 %mul, %a + br label %return + +return: ; preds = %if.end, %entry + %retval.0.in = phi i32 [ %b, %entry ], [ %spec.select, %if.end ] + %retval.0 = sext i32 %retval.0.in to i64 + ret i64 %retval.0 +} + +define i64 @i_a_0(i32 signext %a, i32 signext %b) { +; CHECK-LABEL: i_a_0: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: jle .LBB17_1 +; CHECK-NEXT: # %bb.2: # %return +; CHECK-NEXT: movslq %esi, %rax +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB17_1: # %if.end +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: cmovel %esi, %eax +; CHECK-NEXT: imull %edi, %eax +; CHECK-NEXT: movl %eax, %esi +; CHECK-NEXT: movslq %esi, %rax +; CHECK-NEXT: retq +entry: + %cmp = icmp sgt i32 %a, 0 + br i1 %cmp, label %return, label %if.end + +if.end: ; preds = %entry + %cmp1 = icmp eq i32 %a, 0 + %mul = select i1 %cmp1, i32 %b, i32 1 + %spec.select = mul nsw i32 %mul, %a + br label %return + +return: ; preds = %if.end, %entry + %retval.0.in = phi i32 [ %b, %entry ], [ %spec.select, %if.end ] + %retval.0 = sext i32 %retval.0.in to i64 + ret i64 %retval.0 +} + +define i64 @i_a_1(i32 signext %a, i32 signext %b) { +; CHECK-LABEL: i_a_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpl $1, %edi +; CHECK-NEXT: jg .LBB18_2 +; CHECK-NEXT: # %bb.1: # %if.end +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: cmovel %esi, %eax +; CHECK-NEXT: imull %edi, %eax +; CHECK-NEXT: movl %eax, %esi +; CHECK-NEXT: .LBB18_2: # %return +; CHECK-NEXT: movslq %esi, %rax +; CHECK-NEXT: retq +entry: + %cmp = icmp sgt i32 %a, 1 + br i1 %cmp, label %return, label %if.end + +if.end: ; preds = %entry + %cmp1 = icmp eq i32 %a, 1 + %mul = select i1 %cmp1, i32 %b, i32 1 + %spec.select = mul nsw i32 %mul, %a + br label %return + +return: ; preds = %if.end, %entry + %retval.0.in = phi i32 [ %b, %entry ], [ %spec.select, %if.end ] + %retval.0 = sext i32 %retval.0.in to i64 + ret i64 %retval.0 +} + +define i64 @i_a_2(i32 signext %a, i32 signext %b) { +; CHECK-LABEL: i_a_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpl $2, %edi +; CHECK-NEXT: jg .LBB19_2 +; CHECK-NEXT: # %bb.1: # %if.end +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: cmovel %esi, %eax +; CHECK-NEXT: imull %edi, %eax +; CHECK-NEXT: movl %eax, %esi +; CHECK-NEXT: .LBB19_2: # %return +; CHECK-NEXT: movslq %esi, %rax +; CHECK-NEXT: retq +entry: + %cmp = icmp sgt i32 %a, 2 + br i1 %cmp, label %return, label %if.end + +if.end: ; preds = %entry + %cmp1 = icmp eq i32 %a, 2 + %mul = select i1 %cmp1, i32 %b, i32 1 + %spec.select = mul nsw i32 %mul, %a + br label %return + +return: ; preds = %if.end, %entry + %retval.0.in = phi i32 [ %b, %entry ], [ %spec.select, %if.end ] + %retval.0 = sext i32 %retval.0.in to i64 + ret i64 %retval.0 +} From 2c4f0788776979be4a074d813069bfac289cb7cf Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 7 Oct 2019 06:27:55 +0000 Subject: [PATCH 083/254] [X86] Support LEA64_32r in processInstrForSlow3OpLEA and use INC/DEC when possible. Move the erasing and iterator updating inside to match the other slow LEA function. I've adapted code from optTwoAddrLEA and basically rebuilt the implementation here. We do lose the kill flags now just like optTwoAddrLEA. This runs late enough in the pipeline that shouldn't really be a problem. llvm-svn: 373877 --- llvm/lib/Target/X86/X86FixupLEAs.cpp | 190 +++++++++++++--------- llvm/test/CodeGen/X86/leaFixup32.mir | 6 +- llvm/test/CodeGen/X86/leaFixup64.mir | 20 +-- llvm/test/CodeGen/X86/select-1-or-neg1.ll | 4 +- 4 files changed, 125 insertions(+), 95 deletions(-) diff --git a/llvm/lib/Target/X86/X86FixupLEAs.cpp b/llvm/lib/Target/X86/X86FixupLEAs.cpp index f66c6eb4ec160..543dc8b00fa05 100644 --- a/llvm/lib/Target/X86/X86FixupLEAs.cpp +++ b/llvm/lib/Target/X86/X86FixupLEAs.cpp @@ -67,8 +67,8 @@ class FixupLEAPass : public MachineFunctionPass { /// - LEA that uses RIP relative addressing mode /// - LEA that uses 16-bit addressing mode " /// This function currently handles the first 2 cases only. - MachineInstr *processInstrForSlow3OpLEA(MachineInstr &MI, - MachineBasicBlock &MBB); + void processInstrForSlow3OpLEA(MachineBasicBlock::iterator &I, + MachineBasicBlock &MBB, bool OptIncDec); /// Look for LEAs that are really two address LEAs that we might be able to /// turn into regular ADD instructions. @@ -216,14 +216,10 @@ bool FixupLEAPass::runOnMachineFunction(MachineFunction &MF) { if (optTwoAddrLEA(I, MBB, OptIncDec, UseLEAForSP)) continue; - if (IsSlowLEA) { + if (IsSlowLEA) processInstructionForSlowLEA(I, MBB); - } else if (IsSlow3OpsLEA) { - if (auto *NewMI = processInstrForSlow3OpLEA(*I, MBB)) { - MBB.erase(I); - I = NewMI; - } - } + else if (IsSlow3OpsLEA) + processInstrForSlow3OpLEA(I, MBB, OptIncDec); } // Second pass for creating LEAs. This may reverse some of the @@ -301,18 +297,14 @@ static inline bool isInefficientLEAReg(unsigned Reg) { Reg == X86::R13D || Reg == X86::R13; } -static inline bool isRegOperand(const MachineOperand &Op) { - return Op.isReg() && Op.getReg() != X86::NoRegister; -} - /// Returns true if this LEA uses base an index registers, and the base register /// is known to be inefficient for the subtarget. // TODO: use a variant scheduling class to model the latency profile // of LEA instructions, and implement this logic as a scheduling predicate. static inline bool hasInefficientLEABaseReg(const MachineOperand &Base, const MachineOperand &Index) { - return Base.isReg() && isInefficientLEAReg(Base.getReg()) && - isRegOperand(Index); + return Base.isReg() && isInefficientLEAReg(Base.getReg()) && Index.isReg() && + Index.getReg() != X86::NoRegister; } static inline bool hasLEAOffset(const MachineOperand &Offset) { @@ -534,112 +526,150 @@ void FixupLEAPass::processInstructionForSlowLEA(MachineBasicBlock::iterator &I, } } -MachineInstr * -FixupLEAPass::processInstrForSlow3OpLEA(MachineInstr &MI, - MachineBasicBlock &MBB) { +void FixupLEAPass::processInstrForSlow3OpLEA(MachineBasicBlock::iterator &I, + MachineBasicBlock &MBB, + bool OptIncDec) { + MachineInstr &MI = *I; const unsigned LEAOpcode = MI.getOpcode(); - const MachineOperand &Dst = MI.getOperand(0); + const MachineOperand &Dest = MI.getOperand(0); const MachineOperand &Base = MI.getOperand(1 + X86::AddrBaseReg); const MachineOperand &Scale = MI.getOperand(1 + X86::AddrScaleAmt); const MachineOperand &Index = MI.getOperand(1 + X86::AddrIndexReg); const MachineOperand &Offset = MI.getOperand(1 + X86::AddrDisp); const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg); - if (!(TII->isThreeOperandsLEA(MI) || - hasInefficientLEABaseReg(Base, Index)) || + if (!(TII->isThreeOperandsLEA(MI) || hasInefficientLEABaseReg(Base, Index)) || !TII->isSafeToClobberEFLAGS(MBB, MI) || Segment.getReg() != X86::NoRegister) - return nullptr; + return; + + Register DestReg = Dest.getReg(); + Register BaseReg = Base.getReg(); + Register IndexReg = Index.getReg(); + + if (MI.getOpcode() == X86::LEA64_32r) { + if (BaseReg != 0) + BaseReg = TRI->getSubReg(BaseReg, X86::sub_32bit); + if (IndexReg != 0) + IndexReg = TRI->getSubReg(IndexReg, X86::sub_32bit); + } - Register DstR = Dst.getReg(); - Register BaseR = Base.getReg(); - Register IndexR = Index.getReg(); - Register SSDstR = - (LEAOpcode == X86::LEA64_32r) ? Register(getX86SubSuperRegister(DstR, 64)) - : DstR; bool IsScale1 = Scale.getImm() == 1; - bool IsInefficientBase = isInefficientLEAReg(BaseR); - bool IsInefficientIndex = isInefficientLEAReg(IndexR); + bool IsInefficientBase = isInefficientLEAReg(BaseReg); + bool IsInefficientIndex = isInefficientLEAReg(IndexReg); // Skip these cases since it takes more than 2 instructions // to replace the LEA instruction. - if (IsInefficientBase && SSDstR == BaseR && !IsScale1) - return nullptr; - if (LEAOpcode == X86::LEA64_32r && IsInefficientBase && - (IsInefficientIndex || !IsScale1)) - return nullptr; - - const DebugLoc DL = MI.getDebugLoc(); - const MCInstrDesc &ADDrr = TII->get(getADDrrFromLEA(LEAOpcode)); - const MCInstrDesc &ADDri = TII->get(getADDriFromLEA(LEAOpcode, Offset)); + if (IsInefficientBase && DestReg == BaseReg && !IsScale1) + return; LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; MI.dump();); LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: ";); + MachineInstr *NewMI = nullptr; + // First try to replace LEA with one or two (for the 3-op LEA case) // add instructions: // 1.lea (%base,%index,1), %base => add %index,%base // 2.lea (%base,%index,1), %index => add %base,%index - if (IsScale1 && (DstR == BaseR || DstR == IndexR)) { - const MachineOperand &Src = DstR == BaseR ? Index : Base; - MachineInstr *NewMI = - BuildMI(MBB, MI, DL, ADDrr, DstR).addReg(DstR).add(Src); - LLVM_DEBUG(NewMI->dump();); - // Create ADD instruction for the Offset in case of 3-Ops LEA. - if (hasLEAOffset(Offset)) { - NewMI = BuildMI(MBB, MI, DL, ADDri, DstR).addReg(DstR).add(Offset); - LLVM_DEBUG(NewMI->dump();); + if (IsScale1 && (DestReg == BaseReg || DestReg == IndexReg)) { + unsigned NewOpc = getADDrrFromLEA(MI.getOpcode()); + if (DestReg != BaseReg) + std::swap(BaseReg, IndexReg); + + if (MI.getOpcode() == X86::LEA64_32r) { + // TODO: Do we need the super register implicit use? + NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg) + .addReg(BaseReg) + .addReg(IndexReg) + .addReg(Base.getReg(), RegState::Implicit) + .addReg(Index.getReg(), RegState::Implicit); + } else { + NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg) + .addReg(BaseReg) + .addReg(IndexReg); } - return NewMI; - } - // If the base is inefficient try switching the index and base operands, - // otherwise just break the 3-Ops LEA inst into 2-Ops LEA + ADD instruction: - // lea offset(%base,%index,scale),%dst => - // lea (%base,%index,scale); add offset,%dst - if (!IsInefficientBase || (!IsInefficientIndex && IsScale1)) { - MachineInstr *NewMI = BuildMI(MBB, MI, DL, TII->get(LEAOpcode)) - .add(Dst) - .add(IsInefficientBase ? Index : Base) - .add(Scale) - .add(IsInefficientBase ? Base : Index) - .addImm(0) - .add(Segment); + } else if (!IsInefficientBase || (!IsInefficientIndex && IsScale1)) { + // If the base is inefficient try switching the index and base operands, + // otherwise just break the 3-Ops LEA inst into 2-Ops LEA + ADD instruction: + // lea offset(%base,%index,scale),%dst => + // lea (%base,%index,scale); add offset,%dst + NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(LEAOpcode)) + .add(Dest) + .add(IsInefficientBase ? Index : Base) + .add(Scale) + .add(IsInefficientBase ? Base : Index) + .addImm(0) + .add(Segment); LLVM_DEBUG(NewMI->dump();); + } + + // If either replacement succeeded above, add the offset if needed, then + // replace the instruction. + if (NewMI) { // Create ADD instruction for the Offset in case of 3-Ops LEA. if (hasLEAOffset(Offset)) { - NewMI = BuildMI(MBB, MI, DL, ADDri, DstR).addReg(DstR).add(Offset); - LLVM_DEBUG(NewMI->dump();); + if (OptIncDec && Offset.isImm() && + (Offset.getImm() == 1 || Offset.getImm() == -1)) { + unsigned NewOpc = + getINCDECFromLEA(MI.getOpcode(), Offset.getImm() == 1); + NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg) + .addReg(DestReg); + LLVM_DEBUG(NewMI->dump();); + } else { + unsigned NewOpc = getADDriFromLEA(MI.getOpcode(), Offset); + NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg) + .addReg(DestReg) + .add(Offset); + LLVM_DEBUG(NewMI->dump();); + } } - return NewMI; + + MBB.erase(I); + I = NewMI; + return; } + // Handle the rest of the cases with inefficient base register: - assert(SSDstR != BaseR && "SSDstR == BaseR should be handled already!"); + assert(DestReg != BaseReg && "DestReg == BaseReg should be handled already!"); assert(IsInefficientBase && "efficient base should be handled already!"); + // FIXME: Handle LEA64_32r. + if (LEAOpcode == X86::LEA64_32r) + return; + // lea (%base,%index,1), %dst => mov %base,%dst; add %index,%dst if (IsScale1 && !hasLEAOffset(Offset)) { - bool BIK = Base.isKill() && BaseR != IndexR; - TII->copyPhysReg(MBB, MI, DL, DstR, BaseR, BIK); + bool BIK = Base.isKill() && BaseReg != IndexReg; + TII->copyPhysReg(MBB, MI, MI.getDebugLoc(), DestReg, BaseReg, BIK); LLVM_DEBUG(MI.getPrevNode()->dump();); - MachineInstr *NewMI = - BuildMI(MBB, MI, DL, ADDrr, DstR).addReg(DstR).add(Index); + unsigned NewOpc = getADDrrFromLEA(MI.getOpcode()); + NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(NewOpc), DestReg) + .addReg(DestReg) + .add(Index); LLVM_DEBUG(NewMI->dump();); - return NewMI; + return; } + // lea offset(%base,%index,scale), %dst => // lea offset( ,%index,scale), %dst; add %base,%dst - MachineInstr *NewMI = BuildMI(MBB, MI, DL, TII->get(LEAOpcode)) - .add(Dst) - .addReg(0) - .add(Scale) - .add(Index) - .add(Offset) - .add(Segment); + NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(LEAOpcode)) + .add(Dest) + .addReg(0) + .add(Scale) + .add(Index) + .add(Offset) + .add(Segment); LLVM_DEBUG(NewMI->dump();); - NewMI = BuildMI(MBB, MI, DL, ADDrr, DstR).addReg(DstR).add(Base); + unsigned NewOpc = getADDrrFromLEA(MI.getOpcode()); + NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(NewOpc), DestReg) + .addReg(DestReg) + .add(Base); LLVM_DEBUG(NewMI->dump();); - return NewMI; + + MBB.erase(I); + I = NewMI; } diff --git a/llvm/test/CodeGen/X86/leaFixup32.mir b/llvm/test/CodeGen/X86/leaFixup32.mir index f614a4ad975e3..ede0df7c77d8e 100644 --- a/llvm/test/CodeGen/X86/leaFixup32.mir +++ b/llvm/test/CodeGen/X86/leaFixup32.mir @@ -104,7 +104,7 @@ frameInfo: body: | bb.0 (%ir-block.0): liveins: $eax, $ebp - ; CHECK: $eax = ADD32rr $eax, killed $ebp + ; CHECK: $eax = ADD32rr $eax, $ebp ; CHECK: $eax = ADD32ri8 $eax, -5 $eax = LEA32r killed $eax, 1, killed $ebp, -5, $noreg @@ -139,7 +139,7 @@ frameInfo: body: | bb.0 (%ir-block.0): liveins: $eax, $ebp - ; CHECK: $ebp = ADD32rr $ebp, killed $eax + ; CHECK: $ebp = ADD32rr $ebp, $eax ; CHECK: $ebp = ADD32ri8 $ebp, -5 $ebp = LEA32r killed $ebp, 1, killed $eax, -5, $noreg @@ -315,7 +315,7 @@ frameInfo: body: | bb.0 (%ir-block.0): liveins: $eax, $ebp - ; CHECK: $eax = ADD32rr $eax, killed $ebp + ; CHECK: $eax = ADD32rr $eax, $ebp ; CHECK: $eax = ADD32ri $eax, 129 $eax = LEA32r killed $eax, 1, killed $ebp, 129, $noreg diff --git a/llvm/test/CodeGen/X86/leaFixup64.mir b/llvm/test/CodeGen/X86/leaFixup64.mir index 317c219992c74..4e9c47b11fc49 100644 --- a/llvm/test/CodeGen/X86/leaFixup64.mir +++ b/llvm/test/CodeGen/X86/leaFixup64.mir @@ -177,8 +177,8 @@ frameInfo: body: | bb.0 (%ir-block.0): liveins: $rax, $rbp - ; CHECK: $eax = LEA64_32r killed $rax, 1, killed $rbp, 0 - ; CHECK: $eax = ADD32ri8 $eax, -5 + ; CHECK: $eax = ADD32rr $eax, $ebp, implicit-def $eflags, implicit $rax, implicit $rbp + ; CHECK: $eax = ADD32ri8 $eax, -5, implicit-def $eflags $eax = LEA64_32r killed $rax, 1, killed $rbp, -5, $noreg RETQ $eax @@ -212,8 +212,8 @@ frameInfo: body: | bb.0 (%ir-block.0): liveins: $rax, $rbp - ; CHECK: $ebp = LEA64_32r killed $rax, 1, killed $rbp, 0 - ; CHECK: $ebp = ADD32ri8 $ebp, -5 + ; CHECK: $ebp = ADD32rr $ebp, $eax, implicit-def $eflags, implicit $rbp, implicit $rax + ; CHECK: $ebp = ADD32ri8 $ebp, -5, implicit-def $eflags $ebp = LEA64_32r killed $rbp, 1, killed $rax, -5, $noreg RETQ $ebp @@ -281,7 +281,7 @@ frameInfo: body: | bb.0 (%ir-block.0): liveins: $rax, $rbp - ; CHECK: $rax = ADD64rr $rax, killed $rbp + ; CHECK: $rax = ADD64rr $rax, $rbp ; CHECK: $rax = ADD64ri8 $rax, -5 $rax = LEA64r killed $rax, 1, killed $rbp, -5, $noreg @@ -316,7 +316,7 @@ frameInfo: body: | bb.0 (%ir-block.0): liveins: $rax, $rbp - ; CHECK: $rbp = ADD64rr $rbp, killed $rax + ; CHECK: $rbp = ADD64rr $rbp, $rax ; CHECK: $rbp = ADD64ri8 $rbp, -5 $rbp = LEA64r killed $rbp, 1, killed $rax, -5, $noreg @@ -635,8 +635,8 @@ frameInfo: body: | bb.0 (%ir-block.0): liveins: $rax, $rbp - ; CHECK: $eax = LEA64_32r killed $rax, 1, killed $rbp, 0 - ; CHECK: $eax = ADD32ri $eax, 129 + ; CHECK: $eax = ADD32rr $eax, $ebp, implicit-def $eflags + ; CHECK: $eax = ADD32ri $eax, 129, implicit-def $eflags $eax = LEA64_32r killed $rax, 1, killed $rbp, 129, $noreg RETQ $eax @@ -772,8 +772,8 @@ frameInfo: body: | bb.0 (%ir-block.0): liveins: $rax, $rbp - ; CHECK: $rax = ADD64rr $rax, killed $rbp - ; CHECK: $rax = ADD64ri32 $rax, 129 + ; CHECK: $rax = ADD64rr $rax, $rbp, implicit-def $eflags + ; CHECK: $rax = ADD64ri32 $rax, 129, implicit-def $eflags $rax = LEA64r killed $rax, 1, killed $rbp, 129, $noreg RETQ $eax diff --git a/llvm/test/CodeGen/X86/select-1-or-neg1.ll b/llvm/test/CodeGen/X86/select-1-or-neg1.ll index b0244fe7d992c..c85cc08f886b3 100644 --- a/llvm/test/CodeGen/X86/select-1-or-neg1.ll +++ b/llvm/test/CodeGen/X86/select-1-or-neg1.ll @@ -19,8 +19,8 @@ define i32 @PR28968(i32 %x) { ; SLOWLEA3-NEXT: xorl %eax, %eax ; SLOWLEA3-NEXT: cmpl $1, %edi ; SLOWLEA3-NEXT: sete %al -; SLOWLEA3-NEXT: leal (%rax,%rax), %eax -; SLOWLEA3-NEXT: addl $-1, %eax +; SLOWLEA3-NEXT: addl %eax, %eax +; SLOWLEA3-NEXT: decl %eax ; SLOWLEA3-NEXT: retq %cmp = icmp eq i32 %x, 1 %sel = select i1 %cmp, i32 1, i32 -1 From 6785108356324702a1f1d24a362a538e30d7b9aa Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 7 Oct 2019 06:50:56 +0000 Subject: [PATCH 084/254] [X86] Autogenerate checks in leaFixup32.mir and leaFixup64.mir. NFC llvm-svn: 373878 --- llvm/test/CodeGen/X86/leaFixup32.mir | 157 +++++++++----- llvm/test/CodeGen/X86/leaFixup64.mir | 293 ++++++++++++++++++--------- 2 files changed, 294 insertions(+), 156 deletions(-) diff --git a/llvm/test/CodeGen/X86/leaFixup32.mir b/llvm/test/CodeGen/X86/leaFixup32.mir index ede0df7c77d8e..23f3d76169668 100644 --- a/llvm/test/CodeGen/X86/leaFixup32.mir +++ b/llvm/test/CodeGen/X86/leaFixup32.mir @@ -1,3 +1,4 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -run-pass x86-fixup-LEAs -mtriple=i386 -verify-machineinstrs -mcpu=corei7-avx -o - %s | FileCheck %s --- | ; ModuleID = 'test/CodeGen/X86/fixup-lea.ll' @@ -40,13 +41,13 @@ define i32 @test1lea_ebp_32() { ret i32 0 } - + ;test2addi32_32: 3 operands LEA32r that can be replaced with 2 add instructions where ADD32ri32 ; is chosen define i32 @test2addi32_32() { ret i32 0 } - + ;test1mov1add_ebp_32: 2 operands LEA32r that can be replaced with 1 add 1 mov instructions ; where the base is rbp/r13/ebp register define i32 @test1mov1add_ebp_32() { @@ -64,7 +65,7 @@ define i32 @testleaadd_ebp_index2_32() { ret i32 0 } - + ;test_skip_opt_32: 3 operands LEA32r that can not be replaced with 2 instructions define i32 @test_skip_opt_32() { ret i32 0 @@ -84,10 +85,10 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -liveins: +liveins: - { reg: '$eax' } - { reg: '$ebp' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -104,9 +105,12 @@ frameInfo: body: | bb.0 (%ir-block.0): liveins: $eax, $ebp - ; CHECK: $eax = ADD32rr $eax, $ebp - ; CHECK: $eax = ADD32ri8 $eax, -5 - + + ; CHECK-LABEL: name: test2add_32 + ; CHECK: liveins: $eax, $ebp + ; CHECK: $eax = ADD32rr $eax, $ebp, implicit-def $eflags + ; CHECK: $eax = ADD32ri8 $eax, -5, implicit-def $eflags + ; CHECK: RETQ $eax $eax = LEA32r killed $eax, 1, killed $ebp, -5, $noreg RETQ $eax @@ -119,10 +123,10 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -liveins: +liveins: - { reg: '$eax' } - { reg: '$ebp' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -139,9 +143,12 @@ frameInfo: body: | bb.0 (%ir-block.0): liveins: $eax, $ebp - ; CHECK: $ebp = ADD32rr $ebp, $eax - ; CHECK: $ebp = ADD32ri8 $ebp, -5 - + + ; CHECK-LABEL: name: test2add_ebp_32 + ; CHECK: liveins: $eax, $ebp + ; CHECK: $ebp = ADD32rr $ebp, $eax, implicit-def $eflags + ; CHECK: $ebp = ADD32ri8 $ebp, -5, implicit-def $eflags + ; CHECK: RETQ $ebp $ebp = LEA32r killed $ebp, 1, killed $eax, -5, $noreg RETQ $ebp @@ -154,10 +161,10 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -liveins: +liveins: - { reg: '$eax' } - { reg: '$ebp' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -174,8 +181,11 @@ frameInfo: body: | bb.0 (%ir-block.0): liveins: $eax, $ebp - ; CHECK: $ebp = ADD32rr $ebp, $eax - + + ; CHECK-LABEL: name: test1add_ebp_32 + ; CHECK: liveins: $eax, $ebp + ; CHECK: $ebp = ADD32rr $ebp, $eax, implicit-def $eflags + ; CHECK: RETQ $ebp $ebp = LEA32r killed $ebp, 1, killed $eax, 0, $noreg RETQ $ebp @@ -188,11 +198,11 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -liveins: +liveins: - { reg: '$eax' } - { reg: '$ebp' } - { reg: '$ebx' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -209,9 +219,12 @@ frameInfo: body: | bb.0 (%ir-block.0): liveins: $eax, $ebp, $esi - ; CHECK: $ebx = LEA32r killed $eax, 1, killed $ebp, 0 - ; CHECK: $ebx = ADD32ri8 $ebx, -5 - + + ; CHECK-LABEL: name: testleaadd_32 + ; CHECK: liveins: $eax, $ebp, $esi + ; CHECK: $ebx = LEA32r killed $eax, 1, killed $ebp, 0, $noreg + ; CHECK: $ebx = ADD32ri8 $ebx, -5, implicit-def $eflags + ; CHECK: RETQ $ebx $ebx = LEA32r killed $eax, 1, killed $ebp, -5, $noreg RETQ $ebx @@ -224,11 +237,11 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -liveins: +liveins: - { reg: '$eax' } - { reg: '$ebp' } - { reg: '$ebx' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -245,9 +258,12 @@ frameInfo: body: | bb.0 (%ir-block.0): liveins: $eax, $ebp + + ; CHECK-LABEL: name: testleaadd_ebp_32 + ; CHECK: liveins: $eax, $ebp ; CHECK: $ebx = LEA32r killed $eax, 1, killed $ebp, 0, $noreg - ; CHECK: $ebx = ADD32ri8 $ebx, -5 - + ; CHECK: $ebx = ADD32ri8 $ebx, -5, implicit-def $eflags + ; CHECK: RETQ $ebx $ebx = LEA32r killed $ebp, 1, killed $eax, -5, $noreg RETQ $ebx @@ -260,11 +276,11 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -liveins: +liveins: - { reg: '$eax' } - { reg: '$ebp' } - { reg: '$ebx' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -281,8 +297,11 @@ frameInfo: body: | bb.0 (%ir-block.0): liveins: $eax, $ebp + + ; CHECK-LABEL: name: test1lea_ebp_32 + ; CHECK: liveins: $eax, $ebp ; CHECK: $ebx = LEA32r killed $eax, 1, killed $ebp, 0, $noreg - + ; CHECK: RETQ $ebx $ebx = LEA32r killed $ebp, 1, killed $eax, 0, $noreg RETQ $ebx @@ -295,10 +314,10 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -liveins: +liveins: - { reg: '$eax' } - { reg: '$ebp' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -315,9 +334,12 @@ frameInfo: body: | bb.0 (%ir-block.0): liveins: $eax, $ebp - ; CHECK: $eax = ADD32rr $eax, $ebp - ; CHECK: $eax = ADD32ri $eax, 129 - + + ; CHECK-LABEL: name: test2addi32_32 + ; CHECK: liveins: $eax, $ebp + ; CHECK: $eax = ADD32rr $eax, $ebp, implicit-def $eflags + ; CHECK: $eax = ADD32ri $eax, 129, implicit-def $eflags + ; CHECK: RETQ $eax $eax = LEA32r killed $eax, 1, killed $ebp, 129, $noreg RETQ $eax @@ -330,11 +352,11 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -liveins: +liveins: - { reg: '$eax' } - { reg: '$ebx' } - { reg: '$ebp' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -351,9 +373,13 @@ frameInfo: body: | bb.0 (%ir-block.0): liveins: $eax, $ebp, $ebx - ; CHECK: $ebx = MOV32rr $ebp - ; CHECK: $ebx = ADD32rr $ebx, $ebp - + + ; CHECK-LABEL: name: test1mov1add_ebp_32 + ; CHECK: liveins: $eax, $ebp, $ebx + ; CHECK: $ebx = MOV32rr $ebp + ; CHECK: $ebx = ADD32rr $ebx, $ebp, implicit-def $eflags + ; CHECK: $ebx = LEA32r killed $ebp, 1, $ebp, 0, $noreg + ; CHECK: RETQ $ebx $ebx = LEA32r killed $ebp, 1, $ebp, 0, $noreg RETQ $ebx @@ -366,10 +392,10 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -liveins: +liveins: - { reg: '$ebx' } - { reg: '$ebp' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -386,9 +412,12 @@ frameInfo: body: | bb.0 (%ir-block.0): liveins: $eax, $ebp, $ebx + + ; CHECK-LABEL: name: testleaadd_ebp_index_32 + ; CHECK: liveins: $eax, $ebp, $ebx ; CHECK: $ebx = LEA32r $noreg, 1, $ebp, 5, $noreg - ; CHECK: $ebx = ADD32rr $ebx, $ebp - + ; CHECK: $ebx = ADD32rr $ebx, $ebp, implicit-def $eflags + ; CHECK: RETQ $ebx $ebx = LEA32r $ebp, 1, $ebp, 5, $noreg RETQ $ebx @@ -401,10 +430,10 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -liveins: +liveins: - { reg: '$ebx' } - { reg: '$ebp' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -421,9 +450,12 @@ frameInfo: body: | bb.0 (%ir-block.0): liveins: $eax, $ebp, $ebx + + ; CHECK-LABEL: name: testleaadd_ebp_index2_32 + ; CHECK: liveins: $eax, $ebp, $ebx ; CHECK: $ebx = LEA32r $noreg, 4, $ebp, 5, $noreg - ; CHECK: $ebx = ADD32rr $ebx, $ebp - + ; CHECK: $ebx = ADD32rr $ebx, $ebp, implicit-def $eflags + ; CHECK: RETQ $ebx $ebx = LEA32r $ebp, 4, $ebp, 5, $noreg RETQ $ebx @@ -436,10 +468,10 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -liveins: +liveins: - { reg: '$ebx' } - { reg: '$ebp' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -456,8 +488,11 @@ frameInfo: body: | bb.0 (%ir-block.0): liveins: $eax, $ebp, $ebx + + ; CHECK-LABEL: name: test_skip_opt_32 + ; CHECK: liveins: $eax, $ebp, $ebx ; CHECK: $ebp = LEA32r killed $ebp, 4, killed $ebp, 0, $noreg - + ; CHECK: RETQ $ebp $ebp = LEA32r killed $ebp, 4, killed $ebp, 0, $noreg RETQ $ebp @@ -470,10 +505,10 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -liveins: +liveins: - { reg: '$ebp' } - { reg: '$eax' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -488,12 +523,22 @@ frameInfo: hasVAStart: false hasMustTailInVarArgFunc: false body: | + ; CHECK-LABEL: name: test_skip_eflags_32 + ; CHECK: bb.0 (%ir-block.0): + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $eax, $ebp, $ebx + ; CHECK: CMP32rr $eax, killed $ebx, implicit-def $eflags + ; CHECK: $ebx = LEA32r killed $eax, 4, killed $eax, 5, $noreg + ; CHECK: JCC_1 %bb.1, 4, implicit $eflags + ; CHECK: RETQ $ebx + ; CHECK: bb.1: + ; CHECK: liveins: $eax, $ebp, $ebx + ; CHECK: $ebp = LEA32r killed $ebx, 4, killed $ebx, 0, $noreg + ; CHECK: $ebp = ADD32ri8 $ebp, 5, implicit-def $eflags + ; CHECK: RETQ $ebp bb.0 (%ir-block.0): liveins: $eax, $ebp, $ebx - ; CHECK: $ebx = LEA32r killed $eax, 4, killed $eax, 5, $noreg - ; CHECK: $ebp = LEA32r killed $ebx, 4, killed $ebx, 0, $noreg - ; CHECK: $ebp = ADD32ri8 $ebp, 5 - + CMP32rr $eax, killed $ebx, implicit-def $eflags $ebx = LEA32r killed $eax, 4, killed $eax, 5, $noreg JCC_1 %bb.1, 4, implicit $eflags diff --git a/llvm/test/CodeGen/X86/leaFixup64.mir b/llvm/test/CodeGen/X86/leaFixup64.mir index 4e9c47b11fc49..77be582225694 100644 --- a/llvm/test/CodeGen/X86/leaFixup64.mir +++ b/llvm/test/CodeGen/X86/leaFixup64.mir @@ -1,3 +1,4 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -run-pass x86-fixup-LEAs -mtriple=x86_64-gnu-unknown -verify-machineinstrs -mcpu=corei7-avx -o - %s | FileCheck %s --- | ; ModuleID = 'lea-2.ll' @@ -78,13 +79,13 @@ define i32 @test8() { ret i32 0 } - + ;testleaaddi32_64_32: 3 operands LEA64_32r that can be replaced with 1 lea + 1 add instructions where ; ADD64ri32 is chosen define i32 @testleaaddi32_64_32() { ret i32 0 } - + ;test1mov1add_rbp_64_32: 2 operands LEA64_32r cannot be replaced with 1 add 1 mov instructions ; where the base is rbp/r13/ebp register define i32 @test1mov1add_rbp_64_32() { @@ -102,13 +103,13 @@ define i32 @testleaadd_rbp_index2_64_32() { ret i32 0 } - + ;test2addi32_64: 3 operands LEA64r that can be replaced with 2 add instructions where ADD64ri32 ; is chosen define i32 @test2addi32_64() { ret i32 0 } - + ;test1mov1add_rbp_64: 2 operands LEA64r that can be replaced with 1 add 1 mov instructions ; where the base is rbp/r13/ebp register define i32 @test1mov1add_rbp_64() { @@ -157,10 +158,10 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -liveins: +liveins: - { reg: '$rax' } - { reg: '$rbp' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -177,9 +178,12 @@ frameInfo: body: | bb.0 (%ir-block.0): liveins: $rax, $rbp + + ; CHECK-LABEL: name: testleaadd_64_32_1 + ; CHECK: liveins: $rax, $rbp ; CHECK: $eax = ADD32rr $eax, $ebp, implicit-def $eflags, implicit $rax, implicit $rbp ; CHECK: $eax = ADD32ri8 $eax, -5, implicit-def $eflags - + ; CHECK: RETQ $eax $eax = LEA64_32r killed $rax, 1, killed $rbp, -5, $noreg RETQ $eax @@ -192,10 +196,10 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -liveins: +liveins: - { reg: '$rax' } - { reg: '$rbp' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -212,9 +216,12 @@ frameInfo: body: | bb.0 (%ir-block.0): liveins: $rax, $rbp + + ; CHECK-LABEL: name: testleaadd_rbp_64_32_1 + ; CHECK: liveins: $rax, $rbp ; CHECK: $ebp = ADD32rr $ebp, $eax, implicit-def $eflags, implicit $rbp, implicit $rax ; CHECK: $ebp = ADD32ri8 $ebp, -5, implicit-def $eflags - + ; CHECK: RETQ $ebp $ebp = LEA64_32r killed $rbp, 1, killed $rax, -5, $noreg RETQ $ebp @@ -227,10 +234,10 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -liveins: +liveins: - { reg: '$rax' } - { reg: '$rbp' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -247,8 +254,11 @@ frameInfo: body: | bb.0 (%ir-block.0): liveins: $rax, $rbp + + ; CHECK-LABEL: name: test1lea_rbp_64_32_1 + ; CHECK: liveins: $rax, $rbp ; CHECK: $ebp = ADD32rr $ebp, $eax, implicit-def $eflags, implicit $rbp, implicit $rax - + ; CHECK: RETQ $ebp $ebp = LEA64_32r killed $rbp, 1, killed $rax, 0, $noreg RETQ $ebp @@ -261,10 +271,10 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -liveins: +liveins: - { reg: '$rax' } - { reg: '$rbp' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -281,9 +291,12 @@ frameInfo: body: | bb.0 (%ir-block.0): liveins: $rax, $rbp - ; CHECK: $rax = ADD64rr $rax, $rbp - ; CHECK: $rax = ADD64ri8 $rax, -5 - + + ; CHECK-LABEL: name: test2add_64 + ; CHECK: liveins: $rax, $rbp + ; CHECK: $rax = ADD64rr $rax, $rbp, implicit-def $eflags + ; CHECK: $rax = ADD64ri8 $rax, -5, implicit-def $eflags + ; CHECK: RETQ $eax $rax = LEA64r killed $rax, 1, killed $rbp, -5, $noreg RETQ $eax @@ -296,10 +309,10 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -liveins: +liveins: - { reg: '$rax' } - { reg: '$rbp' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -316,9 +329,12 @@ frameInfo: body: | bb.0 (%ir-block.0): liveins: $rax, $rbp - ; CHECK: $rbp = ADD64rr $rbp, $rax - ; CHECK: $rbp = ADD64ri8 $rbp, -5 - + + ; CHECK-LABEL: name: test2add_rbp_64 + ; CHECK: liveins: $rax, $rbp + ; CHECK: $rbp = ADD64rr $rbp, $rax, implicit-def $eflags + ; CHECK: $rbp = ADD64ri8 $rbp, -5, implicit-def $eflags + ; CHECK: RETQ $ebp $rbp = LEA64r killed $rbp, 1, killed $rax, -5, $noreg RETQ $ebp @@ -331,10 +347,10 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -liveins: +liveins: - { reg: '$rax' } - { reg: '$rbp' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -351,8 +367,11 @@ frameInfo: body: | bb.0 (%ir-block.0): liveins: $rax, $rbp - ; CHECK: $rbp = ADD64rr $rbp, $rax - + + ; CHECK-LABEL: name: test1add_rbp_64 + ; CHECK: liveins: $rax, $rbp + ; CHECK: $rbp = ADD64rr $rbp, $rax, implicit-def $eflags + ; CHECK: RETQ $ebp $rbp = LEA64r killed $rbp, 1, killed $rax, 0, $noreg RETQ $ebp @@ -365,11 +384,11 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -liveins: +liveins: - { reg: '$rax' } - { reg: '$rbp' } - { reg: '$rbx' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -386,9 +405,12 @@ frameInfo: body: | bb.0 (%ir-block.0): liveins: $rax, $rbp + + ; CHECK-LABEL: name: testleaadd_64_32 + ; CHECK: liveins: $rax, $rbp ; CHECK: $ebx = LEA64_32r killed $rax, 1, killed $rbp, 0, $noreg - ; CHECK: $ebx = ADD32ri8 $ebx, -5 - + ; CHECK: $ebx = ADD32ri8 $ebx, -5, implicit-def $eflags + ; CHECK: RETQ $ebx $ebx = LEA64_32r killed $rax, 1, killed $rbp, -5, $noreg RETQ $ebx @@ -401,11 +423,11 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -liveins: +liveins: - { reg: '$rax' } - { reg: '$rbp' } - { reg: '$rbx' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -422,9 +444,12 @@ frameInfo: body: | bb.0 (%ir-block.0): liveins: $rax, $rbp + + ; CHECK-LABEL: name: testleaadd_rbp_64_32 + ; CHECK: liveins: $rax, $rbp ; CHECK: $ebx = LEA64_32r killed $rax, 1, killed $rbp, 0, $noreg - ; CHECK: $ebx = ADD32ri8 $ebx, -5 - + ; CHECK: $ebx = ADD32ri8 $ebx, -5, implicit-def $eflags + ; CHECK: RETQ $ebx $ebx = LEA64_32r killed $rbp, 1, killed $rax, -5, $noreg RETQ $ebx @@ -437,11 +462,11 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -liveins: +liveins: - { reg: '$rax' } - { reg: '$rbp' } - { reg: '$rbx' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -458,8 +483,11 @@ frameInfo: body: | bb.0 (%ir-block.0): liveins: $rax, $rbp + + ; CHECK-LABEL: name: test1lea_rbp_64_32 + ; CHECK: liveins: $rax, $rbp ; CHECK: $ebx = LEA64_32r killed $rax, 1, killed $rbp, 0, $noreg - + ; CHECK: RETQ $ebx $ebx = LEA64_32r killed $rbp, 1, killed $rax, 0, $noreg RETQ $ebx @@ -472,11 +500,11 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -liveins: +liveins: - { reg: '$rax' } - { reg: '$rbp' } - { reg: '$rbx' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -493,9 +521,12 @@ frameInfo: body: | bb.0 (%ir-block.0): liveins: $rax, $rbp + + ; CHECK-LABEL: name: testleaadd_64 + ; CHECK: liveins: $rax, $rbp ; CHECK: $rbx = LEA64r killed $rax, 1, killed $rbp, 0, $noreg - ; CHECK: $rbx = ADD64ri8 $rbx, -5 - + ; CHECK: $rbx = ADD64ri8 $rbx, -5, implicit-def $eflags + ; CHECK: RETQ $ebx $rbx = LEA64r killed $rax, 1, killed $rbp, -5, $noreg RETQ $ebx @@ -508,11 +539,11 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -liveins: +liveins: - { reg: '$rax' } - { reg: '$rbp' } - { reg: '$rbx' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -529,9 +560,12 @@ frameInfo: body: | bb.0 (%ir-block.0): liveins: $rax, $rbp + + ; CHECK-LABEL: name: testleaadd_rbp_64 + ; CHECK: liveins: $rax, $rbp ; CHECK: $rbx = LEA64r killed $rax, 1, killed $rbp, 0, $noreg - ; CHECK: $rbx = ADD64ri8 $rbx, -5 - + ; CHECK: $rbx = ADD64ri8 $rbx, -5, implicit-def $eflags + ; CHECK: RETQ $ebx $rbx = LEA64r killed $rbp, 1, killed $rax, -5, $noreg RETQ $ebx @@ -544,11 +578,11 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -liveins: +liveins: - { reg: '$rax' } - { reg: '$rbp' } - { reg: '$rbx' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -565,8 +599,11 @@ frameInfo: body: | bb.0 (%ir-block.0): liveins: $rax, $rbp + + ; CHECK-LABEL: name: test1lea_rbp_64 + ; CHECK: liveins: $rax, $rbp ; CHECK: $rbx = LEA64r killed $rax, 1, killed $rbp, 0, $noreg - + ; CHECK: RETQ $ebx $rbx = LEA64r killed $rbp, 1, killed $rax, 0, $noreg RETQ $ebx @@ -579,10 +616,10 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -liveins: +liveins: - { reg: '$rdi' } - { reg: '$rbp' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -599,8 +636,13 @@ frameInfo: body: | bb.0 (%ir-block.0): liveins: $rdi, $rbp - ; CHECK: $r12 = LEA64r $noreg, 2, killed $r13, 5, $noreg - ; CHECK: $r12 = ADD64rr $r12, killed $rbp + ; CHECK-LABEL: name: test8 + ; CHECK: liveins: $rdi, $rbp + ; CHECK: $rbp = KILL $rbp, implicit-def $rbp + ; CHECK: $r13 = KILL $rdi, implicit-def $r13 + ; CHECK: $r12 = LEA64r $noreg, 2, killed $r13, 5, $noreg + ; CHECK: $r12 = ADD64rr $r12, killed $rbp, implicit-def $eflags + ; CHECK: RETQ $r12 $rbp = KILL $rbp, implicit-def $rbp $r13 = KILL $rdi, implicit-def $r13 $r12 = LEA64r killed $rbp, 2, killed $r13, 5, $noreg @@ -615,10 +657,10 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -liveins: +liveins: - { reg: '$rax' } - { reg: '$rbp' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -635,9 +677,12 @@ frameInfo: body: | bb.0 (%ir-block.0): liveins: $rax, $rbp - ; CHECK: $eax = ADD32rr $eax, $ebp, implicit-def $eflags + + ; CHECK-LABEL: name: testleaaddi32_64_32 + ; CHECK: liveins: $rax, $rbp + ; CHECK: $eax = ADD32rr $eax, $ebp, implicit-def $eflags, implicit $rax, implicit $rbp ; CHECK: $eax = ADD32ri $eax, 129, implicit-def $eflags - + ; CHECK: RETQ $eax $eax = LEA64_32r killed $rax, 1, killed $rbp, 129, $noreg RETQ $eax @@ -650,10 +695,10 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -liveins: +liveins: - { reg: '$rax' } - { reg: '$rbp' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -670,8 +715,11 @@ frameInfo: body: | bb.0 (%ir-block.0): liveins: $rax, $rbp, $rbx - ; CHECK: $ebx = LEA64_32r killed $rbp, 1, killed $rbp, 0, $noreg + ; CHECK-LABEL: name: test1mov1add_rbp_64_32 + ; CHECK: liveins: $rax, $rbp, $rbx + ; CHECK: $ebx = LEA64_32r killed $rbp, 1, killed $rbp, 0, $noreg + ; CHECK: RETQ $ebx $ebx = LEA64_32r killed $rbp, 1, killed $rbp, 0, $noreg RETQ $ebx @@ -684,10 +732,10 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -liveins: +liveins: - { reg: '$rbx' } - { reg: '$rbp' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -704,8 +752,11 @@ frameInfo: body: | bb.0 (%ir-block.0): liveins: $rax, $rbp, $rbx + + ; CHECK-LABEL: name: testleaadd_rbp_index_64_32 + ; CHECK: liveins: $rax, $rbp, $rbx ; CHECK: $ebx = LEA64_32r killed $rbp, 1, killed $rbp, 5, $noreg - + ; CHECK: RETQ $ebx $ebx = LEA64_32r killed $rbp, 1, killed $rbp, 5, $noreg RETQ $ebx @@ -718,10 +769,10 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -liveins: +liveins: - { reg: '$rbx' } - { reg: '$rbp' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -738,8 +789,11 @@ frameInfo: body: | bb.0 (%ir-block.0): liveins: $eax, $ebp, $ebx + + ; CHECK-LABEL: name: testleaadd_rbp_index2_64_32 + ; CHECK: liveins: $eax, $ebp, $ebx ; CHECK: $ebx = LEA64_32r killed $rbp, 4, killed $rbp, 5, $noreg - + ; CHECK: RETQ $ebx $ebx = LEA64_32r killed $rbp, 4, killed $rbp, 5, $noreg RETQ $ebx @@ -752,10 +806,10 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -liveins: +liveins: - { reg: '$rax' } - { reg: '$rbp' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -772,9 +826,12 @@ frameInfo: body: | bb.0 (%ir-block.0): liveins: $rax, $rbp + + ; CHECK-LABEL: name: test2addi32_64 + ; CHECK: liveins: $rax, $rbp ; CHECK: $rax = ADD64rr $rax, $rbp, implicit-def $eflags ; CHECK: $rax = ADD64ri32 $rax, 129, implicit-def $eflags - + ; CHECK: RETQ $eax $rax = LEA64r killed $rax, 1, killed $rbp, 129, $noreg RETQ $eax @@ -787,10 +844,10 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -liveins: +liveins: - { reg: '$rax' } - { reg: '$rbp' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -807,9 +864,13 @@ frameInfo: body: | bb.0 (%ir-block.0): liveins: $rax, $rbp, $rbx + + ; CHECK-LABEL: name: test1mov1add_rbp_64 + ; CHECK: liveins: $rax, $rbp, $rbx ; CHECK: $rbx = MOV64rr $rbp - ; CHECK: $rbx = ADD64rr $rbx, $rbp - + ; CHECK: $rbx = ADD64rr $rbx, $rbp, implicit-def $eflags + ; CHECK: $rbx = LEA64r killed $rbp, 1, $rbp, 0, $noreg + ; CHECK: RETQ $ebx $rbx = LEA64r killed $rbp, 1, $rbp, 0, $noreg RETQ $ebx @@ -822,10 +883,10 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -liveins: +liveins: - { reg: '$rbx' } - { reg: '$rbp' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -842,9 +903,12 @@ frameInfo: body: | bb.0 (%ir-block.0): liveins: $rax, $rbp, $rbx - ; CHECK: $rbx = LEA64r $noreg, 1, $rbp, 5, $noreg - ; CHECK: $rbx = ADD64rr $rbx, $rbp - + + ; CHECK-LABEL: name: testleaadd_rbp_index_64 + ; CHECK: liveins: $rax, $rbp, $rbx + ; CHECK: $rbx = LEA64r $noreg, 1, $rbp, 5, $noreg + ; CHECK: $rbx = ADD64rr $rbx, $rbp, implicit-def $eflags + ; CHECK: RETQ $ebx $rbx = LEA64r $rbp, 1, $rbp, 5, $noreg RETQ $ebx @@ -857,10 +921,10 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -liveins: +liveins: - { reg: '$rbx' } - { reg: '$rbp' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -877,9 +941,12 @@ frameInfo: body: | bb.0 (%ir-block.0): liveins: $rax, $rbp, $rbx + + ; CHECK-LABEL: name: testleaadd_rbp_index2_64 + ; CHECK: liveins: $rax, $rbp, $rbx ; CHECK: $rbx = LEA64r $noreg, 4, $rbp, 5, $noreg - ; CHECK: $rbx = ADD64rr $rbx, $rbp - + ; CHECK: $rbx = ADD64rr $rbx, $rbp, implicit-def $eflags + ; CHECK: RETQ $ebx $rbx = LEA64r $rbp, 4, $rbp, 5, $noreg RETQ $ebx @@ -892,10 +959,10 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -liveins: +liveins: - { reg: '$rbx' } - { reg: '$rbp' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -912,8 +979,11 @@ frameInfo: body: | bb.0 (%ir-block.0): liveins: $rax, $rbp, $rbx + + ; CHECK-LABEL: name: test_skip_opt_64 + ; CHECK: liveins: $rax, $rbp, $rbx ; CHECK: $rbp = LEA64r killed $rbp, 4, killed $rbp, 0, $noreg - + ; CHECK: RETQ $ebp $rbp = LEA64r killed $rbp, 4, killed $rbp, 0, $noreg RETQ $ebp @@ -926,10 +996,10 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -liveins: +liveins: - { reg: '$rbp' } - { reg: '$rax' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -944,12 +1014,22 @@ frameInfo: hasVAStart: false hasMustTailInVarArgFunc: false body: | + ; CHECK-LABEL: name: test_skip_eflags_64 + ; CHECK: bb.0 (%ir-block.0): + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $rax, $rbp, $rbx + ; CHECK: CMP64rr $rax, killed $rbx, implicit-def $eflags + ; CHECK: $rbx = LEA64r killed $rax, 4, killed $rax, 5, $noreg + ; CHECK: JCC_1 %bb.1, 4, implicit $eflags + ; CHECK: RETQ $ebx + ; CHECK: bb.1: + ; CHECK: liveins: $rax, $rbp, $rbx + ; CHECK: $rbp = LEA64r killed $rbx, 4, killed $rbx, 0, $noreg + ; CHECK: $rbp = ADD64ri8 $rbp, 5, implicit-def $eflags + ; CHECK: RETQ $ebp bb.0 (%ir-block.0): liveins: $rax, $rbp, $rbx - ; CHECK: $rbx = LEA64r killed $rax, 4, killed $rax, 5, $noreg - ; CHECK: $rbp = LEA64r killed $rbx, 4, killed $rbx, 0, $noreg - ; CHECK: $rbp = ADD64ri8 $rbp, 5 - + CMP64rr $rax, killed $rbx, implicit-def $eflags $rbx = LEA64r killed $rax, 4, killed $rax, 5, $noreg JCC_1 %bb.1, 4, implicit $eflags @@ -968,10 +1048,10 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -liveins: +liveins: - { reg: '$rbx' } - { reg: '$rbp' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -988,8 +1068,11 @@ frameInfo: body: | bb.0 (%ir-block.0): liveins: $rax, $rbp, $rbx + + ; CHECK-LABEL: name: test_skip_opt_64_32 + ; CHECK: liveins: $rax, $rbp, $rbx ; CHECK: $ebp = LEA64_32r killed $rbp, 4, killed $rbp, 0, $noreg - + ; CHECK: RETQ $ebp $ebp = LEA64_32r killed $rbp, 4, killed $rbp, 0, $noreg RETQ $ebp @@ -1002,10 +1085,10 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -liveins: +liveins: - { reg: '$rbp' } - { reg: '$rax' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -1020,12 +1103,22 @@ frameInfo: hasVAStart: false hasMustTailInVarArgFunc: false body: | + ; CHECK-LABEL: name: test_skip_eflags_64_32 + ; CHECK: bb.0 (%ir-block.0): + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $rax, $rbp, $rbx + ; CHECK: CMP64rr $rax, killed $rbx, implicit-def $eflags + ; CHECK: $ebx = LEA64_32r killed $rax, 4, killed $rax, 5, $noreg + ; CHECK: JCC_1 %bb.1, 4, implicit $eflags + ; CHECK: RETQ $ebx + ; CHECK: bb.1: + ; CHECK: liveins: $rax, $rbp, $rbx + ; CHECK: $ebp = LEA64_32r killed $rbx, 4, killed $rbx, 0, $noreg + ; CHECK: $ebp = ADD32ri8 $ebp, 5, implicit-def $eflags + ; CHECK: RETQ $ebp bb.0 (%ir-block.0): liveins: $rax, $rbp, $rbx - ; CHECK: $ebx = LEA64_32r killed $rax, 4, killed $rax, 5, $noreg - ; CHECK: $ebp = LEA64_32r killed $rbx, 4, killed $rbx, 0, $noreg - ; CHECK: $ebp = ADD32ri8 $ebp, 5 - + CMP64rr $rax, killed $rbx, implicit-def $eflags $ebx = LEA64_32r killed $rax, 4, killed $rax, 5, $noreg JCC_1 %bb.1, 4, implicit $eflags From 0c56f425a0d2bee766b8627a40af3ad030757e16 Mon Sep 17 00:00:00 2001 From: Djordje Todorovic Date: Mon, 7 Oct 2019 07:31:49 +0000 Subject: [PATCH 085/254] [llvm-locstats] Fix a typo in the documentation; NFC llvm-svn: 373880 --- llvm/docs/CommandGuide/llvm-locstats.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/docs/CommandGuide/llvm-locstats.rst b/llvm/docs/CommandGuide/llvm-locstats.rst index 434b8c8ed2af6..8b99917a1ff4a 100644 --- a/llvm/docs/CommandGuide/llvm-locstats.rst +++ b/llvm/docs/CommandGuide/llvm-locstats.rst @@ -60,7 +60,7 @@ OUTPUT EXAMPLE 20-29% 0 0% 30-39% 0 0% 40-49% 0 0% - 50-99% 1 16% + 50-59% 1 16% 60-69% 0 0% 70-79% 0 0% 80-89% 1 16% From dfc1aee25b68c9819b4a8a868be784110c6e751e Mon Sep 17 00:00:00 2001 From: Martin Storsjo Date: Mon, 7 Oct 2019 08:21:37 +0000 Subject: [PATCH 086/254] Revert "[SLP] avoid reduction transform on patterns that the backend can load-combine" This reverts SVN r373833, as it caused a failed assert "Non-zero loop cost expected" on building numerous projects, see PR43582 for details and reproduction samples. llvm-svn: 373882 --- .../llvm/Analysis/TargetTransformInfo.h | 10 -- llvm/lib/Analysis/TargetTransformInfo.cpp | 53 ------ .../Transforms/Vectorize/SLPVectorizer.cpp | 15 +- .../SLPVectorizer/X86/bad-reduction.ll | 156 ++++++------------ 4 files changed, 55 insertions(+), 179 deletions(-) diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index 67e62f6f29bcb..6da2d7f43bc42 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -1129,16 +1129,6 @@ class TargetTransformInfo { /// Returns -1 if the cost is unknown. int getInstructionThroughput(const Instruction *I) const; - /// Given an input value that is an element of an 'or' reduction, check if the - /// reduction is composed of narrower loaded values. Assuming that a - /// legal-sized reduction of shifted/zexted loaded values can be load combined - /// in the backend, create a relative cost that accounts for the removal of - /// the intermediate ops and replacement by a single wide load. - /// TODO: If load combining is allowed in the IR optimizer, this analysis - /// may not be necessary. - Optional getLoadCombineCost(unsigned Opcode, - ArrayRef Args) const; - /// The abstract base class used to type erase specific TTI /// implementations. class Concept; diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index 6730aa86a99a1..f3d20ce984dbd 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -571,64 +571,11 @@ TargetTransformInfo::getOperandInfo(Value *V, OperandValueProperties &OpProps) { return OpInfo; } -Optional -TargetTransformInfo::getLoadCombineCost(unsigned Opcode, - ArrayRef Args) const { - if (Opcode != Instruction::Or) - return llvm::None; - if (Args.empty()) - return llvm::None; - - // Look past the reduction to find a source value. Arbitrarily follow the - // path through operand 0 of any 'or'. Also, peek through optional - // shift-left-by-constant. - const Value *ZextLoad = Args.front(); - while (match(ZextLoad, m_Or(m_Value(), m_Value())) || - match(ZextLoad, m_Shl(m_Value(), m_Constant()))) - ZextLoad = cast(ZextLoad)->getOperand(0); - - // Check if the input to the reduction is an extended load. - Value *LoadPtr; - if (!match(ZextLoad, m_ZExt(m_Load(m_Value(LoadPtr))))) - return llvm::None; - - // Require that the total load bit width is a legal integer type. - // For example, <8 x i8> --> i64 is a legal integer on a 64-bit target. - // But <16 x i8> --> i128 is not, so the backend probably can't reduce it. - Type *WideType = ZextLoad->getType(); - Type *EltType = LoadPtr->getType()->getPointerElementType(); - unsigned WideWidth = WideType->getIntegerBitWidth(); - unsigned EltWidth = EltType->getIntegerBitWidth(); - if (!isTypeLegal(WideType) || WideWidth % EltWidth != 0) - return llvm::None; - - // Calculate relative cost: {narrow load+zext+shl+or} are assumed to be - // removed and replaced by a single wide load. - // FIXME: This is not accurate for the larger pattern where we replace - // multiple narrow load sequences with just 1 wide load. We could - // remove the addition of the wide load cost here and expect the caller - // to make an adjustment for that. - int Cost = 0; - Cost -= getMemoryOpCost(Instruction::Load, EltType, 0, 0); - Cost -= getCastInstrCost(Instruction::ZExt, WideType, EltType); - Cost -= getArithmeticInstrCost(Instruction::Shl, WideType); - Cost -= getArithmeticInstrCost(Instruction::Or, WideType); - Cost += getMemoryOpCost(Instruction::Load, WideType, 0, 0); - return Cost; -} - - int TargetTransformInfo::getArithmeticInstrCost( unsigned Opcode, Type *Ty, OperandValueKind Opd1Info, OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo, OperandValueProperties Opd2PropInfo, ArrayRef Args) const { - // Check if we can match this instruction as part of a larger pattern. - Optional LoadCombineCost = getLoadCombineCost(Opcode, Args); - if (LoadCombineCost) - return LoadCombineCost.getValue(); - - // Fallback to implementation-specific overrides or base class. int Cost = TTIImpl->getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo, Args); assert(Cost >= 0 && "TTI should not produce negative costs!"); diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index ad12646bdeee6..99428c6c5dee3 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -6499,19 +6499,10 @@ class HorizontalReduction { int ScalarReduxCost = 0; switch (ReductionData.getKind()) { - case RK_Arithmetic: { - // Note: Passing in the reduction operands allows the cost model to match - // load combining patterns for this reduction. - auto *ReduxInst = cast(ReductionRoot); - SmallVector OperandList; - for (Value *Operand : ReduxInst->operands()) - OperandList.push_back(Operand); - ScalarReduxCost = TTI->getArithmeticInstrCost(ReductionData.getOpcode(), - ScalarTy, TargetTransformInfo::OK_AnyValue, - TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None, - TargetTransformInfo::OP_None, OperandList); + case RK_Arithmetic: + ScalarReduxCost = + TTI->getArithmeticInstrCost(ReductionData.getOpcode(), ScalarTy); break; - } case RK_Min: case RK_Max: case RK_UMin: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/bad-reduction.ll b/llvm/test/Transforms/SLPVectorizer/X86/bad-reduction.ll index c44a8524edfe5..e3452e194dbfb 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/bad-reduction.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/bad-reduction.ll @@ -15,37 +15,31 @@ define i64 @load_bswap(%v8i8* %p) { ; CHECK-NEXT: [[G5:%.*]] = getelementptr inbounds [[V8I8]], %v8i8* [[P]], i64 0, i32 5 ; CHECK-NEXT: [[G6:%.*]] = getelementptr inbounds [[V8I8]], %v8i8* [[P]], i64 0, i32 6 ; CHECK-NEXT: [[G7:%.*]] = getelementptr inbounds [[V8I8]], %v8i8* [[P]], i64 0, i32 7 -; CHECK-NEXT: [[T0:%.*]] = load i8, i8* [[G0]] -; CHECK-NEXT: [[T1:%.*]] = load i8, i8* [[G1]] -; CHECK-NEXT: [[T2:%.*]] = load i8, i8* [[G2]] -; CHECK-NEXT: [[T3:%.*]] = load i8, i8* [[G3]] +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[G0]] to <4 x i8>* +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1 ; CHECK-NEXT: [[T4:%.*]] = load i8, i8* [[G4]] ; CHECK-NEXT: [[T5:%.*]] = load i8, i8* [[G5]] ; CHECK-NEXT: [[T6:%.*]] = load i8, i8* [[G6]] ; CHECK-NEXT: [[T7:%.*]] = load i8, i8* [[G7]] -; CHECK-NEXT: [[Z0:%.*]] = zext i8 [[T0]] to i64 -; CHECK-NEXT: [[Z1:%.*]] = zext i8 [[T1]] to i64 -; CHECK-NEXT: [[Z2:%.*]] = zext i8 [[T2]] to i64 -; CHECK-NEXT: [[Z3:%.*]] = zext i8 [[T3]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i64> ; CHECK-NEXT: [[Z4:%.*]] = zext i8 [[T4]] to i64 ; CHECK-NEXT: [[Z5:%.*]] = zext i8 [[T5]] to i64 ; CHECK-NEXT: [[Z6:%.*]] = zext i8 [[T6]] to i64 ; CHECK-NEXT: [[Z7:%.*]] = zext i8 [[T7]] to i64 -; CHECK-NEXT: [[SH0:%.*]] = shl nuw i64 [[Z0]], 56 -; CHECK-NEXT: [[SH1:%.*]] = shl nuw nsw i64 [[Z1]], 48 -; CHECK-NEXT: [[SH2:%.*]] = shl nuw nsw i64 [[Z2]], 40 -; CHECK-NEXT: [[SH3:%.*]] = shl nuw nsw i64 [[Z3]], 32 +; CHECK-NEXT: [[TMP4:%.*]] = shl nuw <4 x i64> [[TMP3]], ; CHECK-NEXT: [[SH4:%.*]] = shl nuw nsw i64 [[Z4]], 24 ; CHECK-NEXT: [[SH5:%.*]] = shl nuw nsw i64 [[Z5]], 16 ; CHECK-NEXT: [[SH6:%.*]] = shl nuw nsw i64 [[Z6]], 8 -; CHECK-NEXT: [[OR01:%.*]] = or i64 [[SH0]], [[SH1]] -; CHECK-NEXT: [[OR012:%.*]] = or i64 [[OR01]], [[SH2]] -; CHECK-NEXT: [[OR0123:%.*]] = or i64 [[OR012]], [[SH3]] -; CHECK-NEXT: [[OR01234:%.*]] = or i64 [[OR0123]], [[SH4]] -; CHECK-NEXT: [[OR012345:%.*]] = or i64 [[OR01234]], [[SH5]] -; CHECK-NEXT: [[OR0123456:%.*]] = or i64 [[OR012345]], [[SH6]] -; CHECK-NEXT: [[OR01234567:%.*]] = or i64 [[OR0123456]], [[Z7]] -; CHECK-NEXT: ret i64 [[OR01234567]] +; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> undef, <4 x i32> +; CHECK-NEXT: [[BIN_RDX:%.*]] = or <4 x i64> [[TMP4]], [[RDX_SHUF]] +; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i64> [[BIN_RDX]], <4 x i64> undef, <4 x i32> +; CHECK-NEXT: [[BIN_RDX2:%.*]] = or <4 x i64> [[BIN_RDX]], [[RDX_SHUF1]] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[BIN_RDX2]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = or i64 [[TMP5]], [[SH4]] +; CHECK-NEXT: [[TMP7:%.*]] = or i64 [[TMP6]], [[SH5]] +; CHECK-NEXT: [[TMP8:%.*]] = or i64 [[TMP7]], [[SH6]] +; CHECK-NEXT: [[OP_EXTRA:%.*]] = or i64 [[TMP8]], [[Z7]] +; CHECK-NEXT: ret i64 [[OP_EXTRA]] ; %g0 = getelementptr inbounds %v8i8, %v8i8* %p, i64 0, i32 0 %g1 = getelementptr inbounds %v8i8, %v8i8* %p, i64 0, i32 1 @@ -103,38 +97,18 @@ define i64 @load_bswap_nop_shift(%v8i8* %p) { ; CHECK-NEXT: [[G5:%.*]] = getelementptr inbounds [[V8I8]], %v8i8* [[P]], i64 0, i32 5 ; CHECK-NEXT: [[G6:%.*]] = getelementptr inbounds [[V8I8]], %v8i8* [[P]], i64 0, i32 6 ; CHECK-NEXT: [[G7:%.*]] = getelementptr inbounds [[V8I8]], %v8i8* [[P]], i64 0, i32 7 -; CHECK-NEXT: [[T0:%.*]] = load i8, i8* [[G0]] -; CHECK-NEXT: [[T1:%.*]] = load i8, i8* [[G1]] -; CHECK-NEXT: [[T2:%.*]] = load i8, i8* [[G2]] -; CHECK-NEXT: [[T3:%.*]] = load i8, i8* [[G3]] -; CHECK-NEXT: [[T4:%.*]] = load i8, i8* [[G4]] -; CHECK-NEXT: [[T5:%.*]] = load i8, i8* [[G5]] -; CHECK-NEXT: [[T6:%.*]] = load i8, i8* [[G6]] -; CHECK-NEXT: [[T7:%.*]] = load i8, i8* [[G7]] -; CHECK-NEXT: [[Z0:%.*]] = zext i8 [[T0]] to i64 -; CHECK-NEXT: [[Z1:%.*]] = zext i8 [[T1]] to i64 -; CHECK-NEXT: [[Z2:%.*]] = zext i8 [[T2]] to i64 -; CHECK-NEXT: [[Z3:%.*]] = zext i8 [[T3]] to i64 -; CHECK-NEXT: [[Z4:%.*]] = zext i8 [[T4]] to i64 -; CHECK-NEXT: [[Z5:%.*]] = zext i8 [[T5]] to i64 -; CHECK-NEXT: [[Z6:%.*]] = zext i8 [[T6]] to i64 -; CHECK-NEXT: [[Z7:%.*]] = zext i8 [[T7]] to i64 -; CHECK-NEXT: [[SH0:%.*]] = shl nuw i64 [[Z0]], 56 -; CHECK-NEXT: [[SH1:%.*]] = shl nuw nsw i64 [[Z1]], 48 -; CHECK-NEXT: [[SH2:%.*]] = shl nuw nsw i64 [[Z2]], 40 -; CHECK-NEXT: [[SH3:%.*]] = shl nuw nsw i64 [[Z3]], 32 -; CHECK-NEXT: [[SH4:%.*]] = shl nuw nsw i64 [[Z4]], 24 -; CHECK-NEXT: [[SH5:%.*]] = shl nuw nsw i64 [[Z5]], 16 -; CHECK-NEXT: [[SH6:%.*]] = shl nuw nsw i64 [[Z6]], 8 -; CHECK-NEXT: [[SH7:%.*]] = shl nuw nsw i64 [[Z7]], 0 -; CHECK-NEXT: [[OR01:%.*]] = or i64 [[SH0]], [[SH1]] -; CHECK-NEXT: [[OR012:%.*]] = or i64 [[OR01]], [[SH2]] -; CHECK-NEXT: [[OR0123:%.*]] = or i64 [[OR012]], [[SH3]] -; CHECK-NEXT: [[OR01234:%.*]] = or i64 [[OR0123]], [[SH4]] -; CHECK-NEXT: [[OR012345:%.*]] = or i64 [[OR01234]], [[SH5]] -; CHECK-NEXT: [[OR0123456:%.*]] = or i64 [[OR012345]], [[SH6]] -; CHECK-NEXT: [[OR01234567:%.*]] = or i64 [[OR0123456]], [[SH7]] -; CHECK-NEXT: ret i64 [[OR01234567]] +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[G0]] to <8 x i8>* +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i64> +; CHECK-NEXT: [[TMP4:%.*]] = shl nuw <8 x i64> [[TMP3]], +; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i64> [[TMP4]], <8 x i64> undef, <8 x i32> +; CHECK-NEXT: [[BIN_RDX:%.*]] = or <8 x i64> [[TMP4]], [[RDX_SHUF]] +; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i64> [[BIN_RDX]], <8 x i64> undef, <8 x i32> +; CHECK-NEXT: [[BIN_RDX2:%.*]] = or <8 x i64> [[BIN_RDX]], [[RDX_SHUF1]] +; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i64> [[BIN_RDX2]], <8 x i64> undef, <8 x i32> +; CHECK-NEXT: [[BIN_RDX4:%.*]] = or <8 x i64> [[BIN_RDX2]], [[RDX_SHUF3]] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i64> [[BIN_RDX4]], i32 0 +; CHECK-NEXT: ret i64 [[TMP5]] ; %g0 = getelementptr inbounds %v8i8, %v8i8* %p, i64 0, i32 0 %g1 = getelementptr inbounds %v8i8, %v8i8* %p, i64 0, i32 1 @@ -194,36 +168,30 @@ define i64 @load64le(i8* %arg) { ; CHECK-NEXT: [[G6:%.*]] = getelementptr inbounds i8, i8* [[ARG]], i64 6 ; CHECK-NEXT: [[G7:%.*]] = getelementptr inbounds i8, i8* [[ARG]], i64 7 ; CHECK-NEXT: [[LD0:%.*]] = load i8, i8* [[ARG]], align 1 -; CHECK-NEXT: [[LD1:%.*]] = load i8, i8* [[G1]], align 1 -; CHECK-NEXT: [[LD2:%.*]] = load i8, i8* [[G2]], align 1 -; CHECK-NEXT: [[LD3:%.*]] = load i8, i8* [[G3]], align 1 -; CHECK-NEXT: [[LD4:%.*]] = load i8, i8* [[G4]], align 1 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[G1]] to <4 x i8>* +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1 ; CHECK-NEXT: [[LD5:%.*]] = load i8, i8* [[G5]], align 1 ; CHECK-NEXT: [[LD6:%.*]] = load i8, i8* [[G6]], align 1 ; CHECK-NEXT: [[LD7:%.*]] = load i8, i8* [[G7]], align 1 ; CHECK-NEXT: [[Z0:%.*]] = zext i8 [[LD0]] to i64 -; CHECK-NEXT: [[Z1:%.*]] = zext i8 [[LD1]] to i64 -; CHECK-NEXT: [[Z2:%.*]] = zext i8 [[LD2]] to i64 -; CHECK-NEXT: [[Z3:%.*]] = zext i8 [[LD3]] to i64 -; CHECK-NEXT: [[Z4:%.*]] = zext i8 [[LD4]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i64> ; CHECK-NEXT: [[Z5:%.*]] = zext i8 [[LD5]] to i64 ; CHECK-NEXT: [[Z6:%.*]] = zext i8 [[LD6]] to i64 ; CHECK-NEXT: [[Z7:%.*]] = zext i8 [[LD7]] to i64 -; CHECK-NEXT: [[S1:%.*]] = shl nuw nsw i64 [[Z1]], 8 -; CHECK-NEXT: [[S2:%.*]] = shl nuw nsw i64 [[Z2]], 16 -; CHECK-NEXT: [[S3:%.*]] = shl nuw nsw i64 [[Z3]], 24 -; CHECK-NEXT: [[S4:%.*]] = shl nuw nsw i64 [[Z4]], 32 +; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw <4 x i64> [[TMP3]], ; CHECK-NEXT: [[S5:%.*]] = shl nuw nsw i64 [[Z5]], 40 ; CHECK-NEXT: [[S6:%.*]] = shl nuw nsw i64 [[Z6]], 48 ; CHECK-NEXT: [[S7:%.*]] = shl nuw i64 [[Z7]], 56 -; CHECK-NEXT: [[O1:%.*]] = or i64 [[S1]], [[Z0]] -; CHECK-NEXT: [[O2:%.*]] = or i64 [[O1]], [[S2]] -; CHECK-NEXT: [[O3:%.*]] = or i64 [[O2]], [[S3]] -; CHECK-NEXT: [[O4:%.*]] = or i64 [[O3]], [[S4]] -; CHECK-NEXT: [[O5:%.*]] = or i64 [[O4]], [[S5]] -; CHECK-NEXT: [[O6:%.*]] = or i64 [[O5]], [[S6]] -; CHECK-NEXT: [[O7:%.*]] = or i64 [[O6]], [[S7]] -; CHECK-NEXT: ret i64 [[O7]] +; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> undef, <4 x i32> +; CHECK-NEXT: [[BIN_RDX:%.*]] = or <4 x i64> [[TMP4]], [[RDX_SHUF]] +; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i64> [[BIN_RDX]], <4 x i64> undef, <4 x i32> +; CHECK-NEXT: [[BIN_RDX2:%.*]] = or <4 x i64> [[BIN_RDX]], [[RDX_SHUF1]] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[BIN_RDX2]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = or i64 [[TMP5]], [[S5]] +; CHECK-NEXT: [[TMP7:%.*]] = or i64 [[TMP6]], [[S6]] +; CHECK-NEXT: [[TMP8:%.*]] = or i64 [[TMP7]], [[S7]] +; CHECK-NEXT: [[OP_EXTRA:%.*]] = or i64 [[TMP8]], [[Z0]] +; CHECK-NEXT: ret i64 [[OP_EXTRA]] ; %g1 = getelementptr inbounds i8, i8* %arg, i64 1 %g2 = getelementptr inbounds i8, i8* %arg, i64 2 @@ -279,38 +247,18 @@ define i64 @load64le_nop_shift(i8* %arg) { ; CHECK-NEXT: [[G5:%.*]] = getelementptr inbounds i8, i8* [[ARG]], i64 5 ; CHECK-NEXT: [[G6:%.*]] = getelementptr inbounds i8, i8* [[ARG]], i64 6 ; CHECK-NEXT: [[G7:%.*]] = getelementptr inbounds i8, i8* [[ARG]], i64 7 -; CHECK-NEXT: [[LD0:%.*]] = load i8, i8* [[ARG]], align 1 -; CHECK-NEXT: [[LD1:%.*]] = load i8, i8* [[G1]], align 1 -; CHECK-NEXT: [[LD2:%.*]] = load i8, i8* [[G2]], align 1 -; CHECK-NEXT: [[LD3:%.*]] = load i8, i8* [[G3]], align 1 -; CHECK-NEXT: [[LD4:%.*]] = load i8, i8* [[G4]], align 1 -; CHECK-NEXT: [[LD5:%.*]] = load i8, i8* [[G5]], align 1 -; CHECK-NEXT: [[LD6:%.*]] = load i8, i8* [[G6]], align 1 -; CHECK-NEXT: [[LD7:%.*]] = load i8, i8* [[G7]], align 1 -; CHECK-NEXT: [[Z0:%.*]] = zext i8 [[LD0]] to i64 -; CHECK-NEXT: [[Z1:%.*]] = zext i8 [[LD1]] to i64 -; CHECK-NEXT: [[Z2:%.*]] = zext i8 [[LD2]] to i64 -; CHECK-NEXT: [[Z3:%.*]] = zext i8 [[LD3]] to i64 -; CHECK-NEXT: [[Z4:%.*]] = zext i8 [[LD4]] to i64 -; CHECK-NEXT: [[Z5:%.*]] = zext i8 [[LD5]] to i64 -; CHECK-NEXT: [[Z6:%.*]] = zext i8 [[LD6]] to i64 -; CHECK-NEXT: [[Z7:%.*]] = zext i8 [[LD7]] to i64 -; CHECK-NEXT: [[S0:%.*]] = shl nuw nsw i64 [[Z0]], 0 -; CHECK-NEXT: [[S1:%.*]] = shl nuw nsw i64 [[Z1]], 8 -; CHECK-NEXT: [[S2:%.*]] = shl nuw nsw i64 [[Z2]], 16 -; CHECK-NEXT: [[S3:%.*]] = shl nuw nsw i64 [[Z3]], 24 -; CHECK-NEXT: [[S4:%.*]] = shl nuw nsw i64 [[Z4]], 32 -; CHECK-NEXT: [[S5:%.*]] = shl nuw nsw i64 [[Z5]], 40 -; CHECK-NEXT: [[S6:%.*]] = shl nuw nsw i64 [[Z6]], 48 -; CHECK-NEXT: [[S7:%.*]] = shl nuw i64 [[Z7]], 56 -; CHECK-NEXT: [[O1:%.*]] = or i64 [[S1]], [[S0]] -; CHECK-NEXT: [[O2:%.*]] = or i64 [[O1]], [[S2]] -; CHECK-NEXT: [[O3:%.*]] = or i64 [[O2]], [[S3]] -; CHECK-NEXT: [[O4:%.*]] = or i64 [[O3]], [[S4]] -; CHECK-NEXT: [[O5:%.*]] = or i64 [[O4]], [[S5]] -; CHECK-NEXT: [[O6:%.*]] = or i64 [[O5]], [[S6]] -; CHECK-NEXT: [[O7:%.*]] = or i64 [[O6]], [[S7]] -; CHECK-NEXT: ret i64 [[O7]] +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[ARG]] to <8 x i8>* +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i64> +; CHECK-NEXT: [[TMP4:%.*]] = shl nuw <8 x i64> [[TMP3]], +; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i64> [[TMP4]], <8 x i64> undef, <8 x i32> +; CHECK-NEXT: [[BIN_RDX:%.*]] = or <8 x i64> [[TMP4]], [[RDX_SHUF]] +; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i64> [[BIN_RDX]], <8 x i64> undef, <8 x i32> +; CHECK-NEXT: [[BIN_RDX2:%.*]] = or <8 x i64> [[BIN_RDX]], [[RDX_SHUF1]] +; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i64> [[BIN_RDX2]], <8 x i64> undef, <8 x i32> +; CHECK-NEXT: [[BIN_RDX4:%.*]] = or <8 x i64> [[BIN_RDX2]], [[RDX_SHUF3]] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i64> [[BIN_RDX4]], i32 0 +; CHECK-NEXT: ret i64 [[TMP5]] ; %g1 = getelementptr inbounds i8, i8* %arg, i64 1 %g2 = getelementptr inbounds i8, i8* %arg, i64 2 From beb696e2a68c4715fb21f282f83be8590979d112 Mon Sep 17 00:00:00 2001 From: James Molloy Date: Mon, 7 Oct 2019 08:23:20 +0000 Subject: [PATCH 087/254] [TableGen] Pacify gcc-5.4 more Followup to a previous pacification, this performs the same workaround to the TableGen generated code for tuple automata. llvm-svn: 373883 --- llvm/utils/TableGen/DFAEmitter.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/utils/TableGen/DFAEmitter.cpp b/llvm/utils/TableGen/DFAEmitter.cpp index cf339dc73d72c..dd3db7c150ba6 100644 --- a/llvm/utils/TableGen/DFAEmitter.cpp +++ b/llvm/utils/TableGen/DFAEmitter.cpp @@ -373,7 +373,7 @@ void CustomDfaEmitter::printActionType(raw_ostream &OS) { OS << TypeName; } void CustomDfaEmitter::printActionValue(action_type A, raw_ostream &OS) { const ActionTuple &AT = Actions[A]; if (AT.size() > 1) - OS << "{"; + OS << "std::make_tuple("; bool First = true; for (const auto &SingleAction : AT) { if (!First) @@ -382,7 +382,7 @@ void CustomDfaEmitter::printActionValue(action_type A, raw_ostream &OS) { SingleAction.print(OS); } if (AT.size() > 1) - OS << "}"; + OS << ")"; } namespace llvm { From 5761e3cef42fb188453253f9e0793542ec7a2eae Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Mon, 7 Oct 2019 08:30:46 +0000 Subject: [PATCH 088/254] [ELF][MIPS] Use lld::elf::{read,write}* instead of llvm::support::endian::{read,write}* This allows us to delete `using namespace llvm::support::endian` and simplify D68323. This change adds runtime config->endianness check but the overhead should be negligible. Reviewed By: ruiu Differential Revision: https://reviews.llvm.org/D68561 llvm-svn: 373884 --- lld/ELF/Arch/Mips.cpp | 138 +++++++++++++++++++++--------------------- 1 file changed, 68 insertions(+), 70 deletions(-) diff --git a/lld/ELF/Arch/Mips.cpp b/lld/ELF/Arch/Mips.cpp index 3674e1cfad644..b9c66cdff5d0f 100644 --- a/lld/ELF/Arch/Mips.cpp +++ b/lld/ELF/Arch/Mips.cpp @@ -14,11 +14,9 @@ #include "Thunks.h" #include "lld/Common/ErrorHandler.h" #include "llvm/Object/ELF.h" -#include "llvm/Support/Endian.h" using namespace llvm; using namespace llvm::object; -using namespace llvm::support::endian; using namespace llvm::ELF; using namespace lld; using namespace lld::elf; @@ -199,7 +197,7 @@ void MIPS::writeGotPlt(uint8_t *buf, const Symbol &) const { uint64_t va = in.plt->getVA(); if (isMicroMips()) va |= 1; - write32(buf, va); + write32(buf, va); } template static uint32_t readShuffle(const uint8_t *loc) { @@ -209,7 +207,7 @@ template static uint32_t readShuffle(const uint8_t *loc) { // as early as possible. To do so, little-endian binaries keep 16-bit // words in a big-endian order. That is why we have to swap these // words to get a correct value. - uint32_t v = read32(loc); + uint32_t v = read32(loc); if (E == support::little) return (v << 16) | (v >> 16); return v; @@ -218,10 +216,10 @@ template static uint32_t readShuffle(const uint8_t *loc) { template static void writeValue(uint8_t *loc, uint64_t v, uint8_t bitsSize, uint8_t shift) { - uint32_t instr = read32(loc); + uint32_t instr = read32(loc); uint32_t mask = 0xffffffff >> (32 - bitsSize); uint32_t data = (instr & ~mask) | ((v >> shift) & mask); - write32(loc, data); + write32(loc, data); } template @@ -241,10 +239,10 @@ static void writeShuffleValue(uint8_t *loc, uint64_t v, uint8_t bitsSize, template static void writeMicroRelocation16(uint8_t *loc, uint64_t v, uint8_t bitsSize, uint8_t shift) { - uint16_t instr = read16(loc); + uint16_t instr = read16(loc); uint16_t mask = 0xffff >> (16 - bitsSize); uint16_t data = (instr & ~mask) | ((v >> shift) & mask); - write16(loc, data); + write16(loc, data); } template void MIPS::writePltHeader(uint8_t *buf) const { @@ -255,53 +253,53 @@ template void MIPS::writePltHeader(uint8_t *buf) const { // Overwrite trap instructions written by Writer::writeTrapInstr. memset(buf, 0, pltHeaderSize); - write16(buf, isMipsR6() ? 0x7860 : 0x7980); // addiupc v1, (GOTPLT) - . - write16(buf + 4, 0xff23); // lw $25, 0($3) - write16(buf + 8, 0x0535); // subu16 $2, $2, $3 - write16(buf + 10, 0x2525); // srl16 $2, $2, 2 - write16(buf + 12, 0x3302); // addiu $24, $2, -2 - write16(buf + 14, 0xfffe); - write16(buf + 16, 0x0dff); // move $15, $31 + write16(buf, isMipsR6() ? 0x7860 : 0x7980); // addiupc v1, (GOTPLT) - . + write16(buf + 4, 0xff23); // lw $25, 0($3) + write16(buf + 8, 0x0535); // subu16 $2, $2, $3 + write16(buf + 10, 0x2525); // srl16 $2, $2, 2 + write16(buf + 12, 0x3302); // addiu $24, $2, -2 + write16(buf + 14, 0xfffe); + write16(buf + 16, 0x0dff); // move $15, $31 if (isMipsR6()) { - write16(buf + 18, 0x0f83); // move $28, $3 - write16(buf + 20, 0x472b); // jalrc $25 - write16(buf + 22, 0x0c00); // nop + write16(buf + 18, 0x0f83); // move $28, $3 + write16(buf + 20, 0x472b); // jalrc $25 + write16(buf + 22, 0x0c00); // nop relocateOne(buf, R_MICROMIPS_PC19_S2, gotPlt - plt); } else { - write16(buf + 18, 0x45f9); // jalrc $25 - write16(buf + 20, 0x0f83); // move $28, $3 - write16(buf + 22, 0x0c00); // nop + write16(buf + 18, 0x45f9); // jalrc $25 + write16(buf + 20, 0x0f83); // move $28, $3 + write16(buf + 22, 0x0c00); // nop relocateOne(buf, R_MICROMIPS_PC23_S2, gotPlt - plt); } return; } if (config->mipsN32Abi) { - write32(buf, 0x3c0e0000); // lui $14, %hi(&GOTPLT[0]) - write32(buf + 4, 0x8dd90000); // lw $25, %lo(&GOTPLT[0])($14) - write32(buf + 8, 0x25ce0000); // addiu $14, $14, %lo(&GOTPLT[0]) - write32(buf + 12, 0x030ec023); // subu $24, $24, $14 - write32(buf + 16, 0x03e07825); // move $15, $31 - write32(buf + 20, 0x0018c082); // srl $24, $24, 2 + write32(buf, 0x3c0e0000); // lui $14, %hi(&GOTPLT[0]) + write32(buf + 4, 0x8dd90000); // lw $25, %lo(&GOTPLT[0])($14) + write32(buf + 8, 0x25ce0000); // addiu $14, $14, %lo(&GOTPLT[0]) + write32(buf + 12, 0x030ec023); // subu $24, $24, $14 + write32(buf + 16, 0x03e07825); // move $15, $31 + write32(buf + 20, 0x0018c082); // srl $24, $24, 2 } else if (ELFT::Is64Bits) { - write32(buf, 0x3c0e0000); // lui $14, %hi(&GOTPLT[0]) - write32(buf + 4, 0xddd90000); // ld $25, %lo(&GOTPLT[0])($14) - write32(buf + 8, 0x25ce0000); // addiu $14, $14, %lo(&GOTPLT[0]) - write32(buf + 12, 0x030ec023); // subu $24, $24, $14 - write32(buf + 16, 0x03e07825); // move $15, $31 - write32(buf + 20, 0x0018c0c2); // srl $24, $24, 3 + write32(buf, 0x3c0e0000); // lui $14, %hi(&GOTPLT[0]) + write32(buf + 4, 0xddd90000); // ld $25, %lo(&GOTPLT[0])($14) + write32(buf + 8, 0x25ce0000); // addiu $14, $14, %lo(&GOTPLT[0]) + write32(buf + 12, 0x030ec023); // subu $24, $24, $14 + write32(buf + 16, 0x03e07825); // move $15, $31 + write32(buf + 20, 0x0018c0c2); // srl $24, $24, 3 } else { - write32(buf, 0x3c1c0000); // lui $28, %hi(&GOTPLT[0]) - write32(buf + 4, 0x8f990000); // lw $25, %lo(&GOTPLT[0])($28) - write32(buf + 8, 0x279c0000); // addiu $28, $28, %lo(&GOTPLT[0]) - write32(buf + 12, 0x031cc023); // subu $24, $24, $28 - write32(buf + 16, 0x03e07825); // move $15, $31 - write32(buf + 20, 0x0018c082); // srl $24, $24, 2 + write32(buf, 0x3c1c0000); // lui $28, %hi(&GOTPLT[0]) + write32(buf + 4, 0x8f990000); // lw $25, %lo(&GOTPLT[0])($28) + write32(buf + 8, 0x279c0000); // addiu $28, $28, %lo(&GOTPLT[0]) + write32(buf + 12, 0x031cc023); // subu $24, $24, $28 + write32(buf + 16, 0x03e07825); // move $15, $31 + write32(buf + 20, 0x0018c082); // srl $24, $24, 2 } uint32_t jalrInst = config->zHazardplt ? 0x0320fc09 : 0x0320f809; - write32(buf + 24, jalrInst); // jalr.hb $25 or jalr $25 - write32(buf + 28, 0x2718fffe); // subu $24, $24, 2 + write32(buf + 24, jalrInst); // jalr.hb $25 or jalr $25 + write32(buf + 28, 0x2718fffe); // subu $24, $24, 2 uint64_t gotPlt = in.gotPlt->getVA(); writeValue(buf, gotPlt + 0x8000, 16, 16); @@ -319,16 +317,16 @@ void MIPS::writePlt(uint8_t *buf, uint64_t gotPltEntryAddr, memset(buf, 0, pltEntrySize); if (isMipsR6()) { - write16(buf, 0x7840); // addiupc $2, (GOTPLT) - . - write16(buf + 4, 0xff22); // lw $25, 0($2) - write16(buf + 8, 0x0f02); // move $24, $2 - write16(buf + 10, 0x4723); // jrc $25 / jr16 $25 + write16(buf, 0x7840); // addiupc $2, (GOTPLT) - . + write16(buf + 4, 0xff22); // lw $25, 0($2) + write16(buf + 8, 0x0f02); // move $24, $2 + write16(buf + 10, 0x4723); // jrc $25 / jr16 $25 relocateOne(buf, R_MICROMIPS_PC19_S2, gotPltEntryAddr - pltEntryAddr); } else { - write16(buf, 0x7900); // addiupc $2, (GOTPLT) - . - write16(buf + 4, 0xff22); // lw $25, 0($2) - write16(buf + 8, 0x4599); // jrc $25 / jr16 $25 - write16(buf + 10, 0x0f02); // move $24, $2 + write16(buf, 0x7900); // addiupc $2, (GOTPLT) - . + write16(buf + 4, 0xff22); // lw $25, 0($2) + write16(buf + 8, 0x4599); // jrc $25 / jr16 $25 + write16(buf + 10, 0x0f02); // move $24, $2 relocateOne(buf, R_MICROMIPS_PC23_S2, gotPltEntryAddr - pltEntryAddr); } return; @@ -339,10 +337,10 @@ void MIPS::writePlt(uint8_t *buf, uint64_t gotPltEntryAddr, : (config->zHazardplt ? 0x03200408 : 0x03200008); uint32_t addInst = ELFT::Is64Bits ? 0x65f80000 : 0x25f80000; - write32(buf, 0x3c0f0000); // lui $15, %hi(.got.plt entry) - write32(buf + 4, loadInst); // l[wd] $25, %lo(.got.plt entry)($15) - write32(buf + 8, jrInst); // jr $25 / jr.hb $25 - write32(buf + 12, addInst); // [d]addiu $24, $15, %lo(.got.plt entry) + write32(buf, 0x3c0f0000); // lui $15, %hi(.got.plt entry) + write32(buf + 4, loadInst); // l[wd] $25, %lo(.got.plt entry)($15) + write32(buf + 8, jrInst); // jr $25 / jr.hb $25 + write32(buf + 12, addInst); // [d]addiu $24, $15, %lo(.got.plt entry) writeValue(buf, gotPltEntryAddr + 0x8000, 16, 16); writeValue(buf + 4, gotPltEntryAddr, 16, 0); writeValue(buf + 12, gotPltEntryAddr, 16, 0); @@ -379,16 +377,16 @@ int64_t MIPS::getImplicitAddend(const uint8_t *buf, RelType type) const { case R_MIPS_GPREL32: case R_MIPS_TLS_DTPREL32: case R_MIPS_TLS_TPREL32: - return SignExtend64<32>(read32(buf)); + return SignExtend64<32>(read32(buf)); case R_MIPS_26: // FIXME (simon): If the relocation target symbol is not a PLT entry // we should use another expression for calculation: // ((A << 2) | (P & 0xf0000000)) >> 2 - return SignExtend64<28>(read32(buf) << 2); + return SignExtend64<28>(read32(buf) << 2); case R_MIPS_GOT16: case R_MIPS_HI16: case R_MIPS_PCHI16: - return SignExtend64<16>(read32(buf)) << 16; + return SignExtend64<16>(read32(buf)) << 16; case R_MIPS_GPREL16: case R_MIPS_LO16: case R_MIPS_PCLO16: @@ -396,7 +394,7 @@ int64_t MIPS::getImplicitAddend(const uint8_t *buf, RelType type) const { case R_MIPS_TLS_DTPREL_LO16: case R_MIPS_TLS_TPREL_HI16: case R_MIPS_TLS_TPREL_LO16: - return SignExtend64<16>(read32(buf)); + return SignExtend64<16>(read32(buf)); case R_MICROMIPS_GOT16: case R_MICROMIPS_HI16: return SignExtend64<16>(readShuffle(buf)) << 16; @@ -410,21 +408,21 @@ int64_t MIPS::getImplicitAddend(const uint8_t *buf, RelType type) const { case R_MICROMIPS_GPREL7_S2: return SignExtend64<9>(readShuffle(buf) << 2); case R_MIPS_PC16: - return SignExtend64<18>(read32(buf) << 2); + return SignExtend64<18>(read32(buf) << 2); case R_MIPS_PC19_S2: - return SignExtend64<21>(read32(buf) << 2); + return SignExtend64<21>(read32(buf) << 2); case R_MIPS_PC21_S2: - return SignExtend64<23>(read32(buf) << 2); + return SignExtend64<23>(read32(buf) << 2); case R_MIPS_PC26_S2: - return SignExtend64<28>(read32(buf) << 2); + return SignExtend64<28>(read32(buf) << 2); case R_MIPS_PC32: - return SignExtend64<32>(read32(buf)); + return SignExtend64<32>(read32(buf)); case R_MICROMIPS_26_S1: return SignExtend64<27>(readShuffle(buf) << 1); case R_MICROMIPS_PC7_S1: - return SignExtend64<8>(read16(buf) << 1); + return SignExtend64<8>(read16(buf) << 1); case R_MICROMIPS_PC10_S1: - return SignExtend64<11>(read16(buf) << 1); + return SignExtend64<11>(read16(buf) << 1); case R_MICROMIPS_PC16_S1: return SignExtend64<17>(readShuffle(buf) << 1); case R_MICROMIPS_PC18_S3: @@ -494,7 +492,7 @@ static uint64_t fixupCrossModeJump(uint8_t *loc, RelType type, uint64_t val) { switch (type) { case R_MIPS_26: { - uint32_t inst = read32(loc) >> 26; + uint32_t inst = read32(loc) >> 26; if (inst == 0x3 || inst == 0x1d) { // JAL or JALX writeValue(loc, 0x1d << 26, 32, 0); return val; @@ -552,12 +550,12 @@ void MIPS::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { case R_MIPS_GPREL32: case R_MIPS_TLS_DTPREL32: case R_MIPS_TLS_TPREL32: - write32(loc, val); + write32(loc, val); break; case R_MIPS_64: case R_MIPS_TLS_DTPREL64: case R_MIPS_TLS_TPREL64: - write64(loc, val); + write64(loc, val); break; case R_MIPS_26: writeValue(loc, val, 26, 2); @@ -643,12 +641,12 @@ void MIPS::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { // Replace jalr/jr instructions by bal/b if the target // offset fits into the 18-bit range. if (isInt<18>(val)) { - switch (read32(loc)) { + switch (read32(loc)) { case 0x0320f809: // jalr $25 => bal sym - write32(loc, 0x04110000 | ((val >> 2) & 0xffff)); + write32(loc, 0x04110000 | ((val >> 2) & 0xffff)); break; case 0x03200008: // jr $25 => b sym - write32(loc, 0x10000000 | ((val >> 2) & 0xffff)); + write32(loc, 0x10000000 | ((val >> 2) & 0xffff)); break; } } From bd8cfe65f5fee4ad573adc2172359c9552e8cdc0 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Mon, 7 Oct 2019 08:31:18 +0000 Subject: [PATCH 089/254] [ELF] Wrap things in `namespace lld { namespace elf {`, NFC This makes it clear `ELF/**/*.cpp` files define things in the `lld::elf` namespace and simplifies `elf::foo` to `foo`. Reviewed By: atanasyan, grimar, ruiu Differential Revision: https://reviews.llvm.org/D68323 llvm-svn: 373885 --- lld/ELF/Arch/AArch64.cpp | 12 ++- lld/ELF/Arch/AMDGPU.cpp | 10 +- lld/ELF/Arch/ARM.cpp | 10 +- lld/ELF/Arch/AVR.cpp | 10 +- lld/ELF/Arch/Hexagon.cpp | 10 +- lld/ELF/Arch/MSP430.cpp | 10 +- lld/ELF/Arch/Mips.cpp | 27 +++--- lld/ELF/Arch/MipsArchTree.cpp | 26 ++--- lld/ELF/Arch/PPC.cpp | 12 ++- lld/ELF/Arch/PPC64.cpp | 22 +++-- lld/ELF/Arch/RISCV.cpp | 10 +- lld/ELF/Arch/SPARCV9.cpp | 10 +- lld/ELF/Arch/X86.cpp | 10 +- lld/ELF/Arch/X86_64.cpp | 10 +- lld/ELF/CallGraphSort.cpp | 10 +- lld/ELF/DWARF.cpp | 15 +-- lld/ELF/Driver.cpp | 14 +-- lld/ELF/DriverUtils.cpp | 25 ++--- lld/ELF/EhFrame.cpp | 12 ++- lld/ELF/ICF.cpp | 17 ++-- lld/ELF/InputFiles.cpp | 64 +++++++------ lld/ELF/InputFiles.h | 6 +- lld/ELF/InputSection.cpp | 18 ++-- lld/ELF/LTO.cpp | 7 +- lld/ELF/LinkerScript.cpp | 9 +- lld/ELF/MapFile.cpp | 12 ++- lld/ELF/MarkLive.cpp | 21 ++-- lld/ELF/OutputSections.cpp | 16 ++-- lld/ELF/Relocations.cpp | 28 +++--- lld/ELF/ScriptLexer.cpp | 7 +- lld/ELF/ScriptParser.cpp | 19 ++-- lld/ELF/SymbolTable.cpp | 10 +- lld/ELF/Symbols.cpp | 35 +++---- lld/ELF/Symbols.h | 17 ++-- lld/ELF/SyntheticSections.cpp | 176 +++++++++++++++++----------------- lld/ELF/Target.cpp | 17 ++-- lld/ELF/Writer.cpp | 36 +++---- 37 files changed, 439 insertions(+), 341 deletions(-) diff --git a/lld/ELF/Arch/AArch64.cpp b/lld/ELF/Arch/AArch64.cpp index 40f0dd65fe761..5cf07029fa1d5 100644 --- a/lld/ELF/Arch/AArch64.cpp +++ b/lld/ELF/Arch/AArch64.cpp @@ -17,13 +17,14 @@ using namespace llvm; using namespace llvm::support::endian; using namespace llvm::ELF; -using namespace lld; -using namespace lld::elf; + +namespace lld { +namespace elf { // Page(Expr) is the page address of the expression Expr, defined // as (Expr & ~0xFFF). (This applies even if the machine page size // supported by the platform has a different value.) -uint64_t elf::getAArch64Page(uint64_t expr) { +uint64_t getAArch64Page(uint64_t expr) { return expr & ~static_cast(0xFFF); } @@ -679,4 +680,7 @@ static TargetInfo *getTargetInfo() { return &t; } -TargetInfo *elf::getAArch64TargetInfo() { return getTargetInfo(); } +TargetInfo *getAArch64TargetInfo() { return getTargetInfo(); } + +} // namespace elf +} // namespace lld diff --git a/lld/ELF/Arch/AMDGPU.cpp b/lld/ELF/Arch/AMDGPU.cpp index f2e32ca0996d5..b42ca77467427 100644 --- a/lld/ELF/Arch/AMDGPU.cpp +++ b/lld/ELF/Arch/AMDGPU.cpp @@ -17,8 +17,9 @@ using namespace llvm; using namespace llvm::object; using namespace llvm::support::endian; using namespace llvm::ELF; -using namespace lld; -using namespace lld::elf; + +namespace lld { +namespace elf { namespace { class AMDGPU final : public TargetInfo { @@ -107,7 +108,10 @@ RelType AMDGPU::getDynRel(RelType type) const { return R_AMDGPU_NONE; } -TargetInfo *elf::getAMDGPUTargetInfo() { +TargetInfo *getAMDGPUTargetInfo() { static AMDGPU target; return ⌖ } + +} // namespace elf +} // namespace lld diff --git a/lld/ELF/Arch/ARM.cpp b/lld/ELF/Arch/ARM.cpp index 64adc33c07ae2..41baea496d369 100644 --- a/lld/ELF/Arch/ARM.cpp +++ b/lld/ELF/Arch/ARM.cpp @@ -18,8 +18,9 @@ using namespace llvm; using namespace llvm::support::endian; using namespace llvm::ELF; -using namespace lld; -using namespace lld::elf; + +namespace lld { +namespace elf { namespace { class ARM final : public TargetInfo { @@ -600,7 +601,10 @@ int64_t ARM::getImplicitAddend(const uint8_t *buf, RelType type) const { } } -TargetInfo *elf::getARMTargetInfo() { +TargetInfo *getARMTargetInfo() { static ARM target; return ⌖ } + +} // namespace elf +} // namespace lld diff --git a/lld/ELF/Arch/AVR.cpp b/lld/ELF/Arch/AVR.cpp index 869f0fe0c5257..cb33ff448ba46 100644 --- a/lld/ELF/Arch/AVR.cpp +++ b/lld/ELF/Arch/AVR.cpp @@ -36,8 +36,9 @@ using namespace llvm; using namespace llvm::object; using namespace llvm::support::endian; using namespace llvm::ELF; -using namespace lld; -using namespace lld::elf; + +namespace lld { +namespace elf { namespace { class AVR final : public TargetInfo { @@ -70,7 +71,10 @@ void AVR::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { } } -TargetInfo *elf::getAVRTargetInfo() { +TargetInfo *getAVRTargetInfo() { static AVR target; return ⌖ } + +} // namespace elf +} // namespace lld diff --git a/lld/ELF/Arch/Hexagon.cpp b/lld/ELF/Arch/Hexagon.cpp index 355ba9de4b97e..033500337ec2f 100644 --- a/lld/ELF/Arch/Hexagon.cpp +++ b/lld/ELF/Arch/Hexagon.cpp @@ -19,8 +19,9 @@ using namespace llvm; using namespace llvm::object; using namespace llvm::support::endian; using namespace llvm::ELF; -using namespace lld; -using namespace lld::elf; + +namespace lld { +namespace elf { namespace { class Hexagon final : public TargetInfo { @@ -318,7 +319,10 @@ RelType Hexagon::getDynRel(RelType type) const { return R_HEX_NONE; } -TargetInfo *elf::getHexagonTargetInfo() { +TargetInfo *getHexagonTargetInfo() { static Hexagon target; return ⌖ } + +} // namespace elf +} // namespace lld diff --git a/lld/ELF/Arch/MSP430.cpp b/lld/ELF/Arch/MSP430.cpp index 90664396c85ea..f03e8181923b1 100644 --- a/lld/ELF/Arch/MSP430.cpp +++ b/lld/ELF/Arch/MSP430.cpp @@ -26,8 +26,9 @@ using namespace llvm; using namespace llvm::object; using namespace llvm::support::endian; using namespace llvm::ELF; -using namespace lld; -using namespace lld::elf; + +namespace lld { +namespace elf { namespace { class MSP430 final : public TargetInfo { @@ -87,7 +88,10 @@ void MSP430::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { } } -TargetInfo *elf::getMSP430TargetInfo() { +TargetInfo *getMSP430TargetInfo() { static MSP430 target; return ⌖ } + +} // namespace elf +} // namespace lld diff --git a/lld/ELF/Arch/Mips.cpp b/lld/ELF/Arch/Mips.cpp index b9c66cdff5d0f..881f647b5a4a8 100644 --- a/lld/ELF/Arch/Mips.cpp +++ b/lld/ELF/Arch/Mips.cpp @@ -18,9 +18,9 @@ using namespace llvm; using namespace llvm::object; using namespace llvm::ELF; -using namespace lld; -using namespace lld::elf; +namespace lld { +namespace elf { namespace { template class MIPS final : public TargetInfo { public: @@ -721,7 +721,7 @@ template bool MIPS::usesOnlyLowPageBits(RelType type) const { } // Return true if the symbol is a PIC function. -template bool elf::isMipsPIC(const Defined *sym) { +template bool isMipsPIC(const Defined *sym) { if (!sym->isFunc()) return false; @@ -739,17 +739,20 @@ template bool elf::isMipsPIC(const Defined *sym) { return file->getObj().getHeader()->e_flags & EF_MIPS_PIC; } -template TargetInfo *elf::getMipsTargetInfo() { +template TargetInfo *getMipsTargetInfo() { static MIPS target; return ⌖ } -template TargetInfo *elf::getMipsTargetInfo(); -template TargetInfo *elf::getMipsTargetInfo(); -template TargetInfo *elf::getMipsTargetInfo(); -template TargetInfo *elf::getMipsTargetInfo(); +template TargetInfo *getMipsTargetInfo(); +template TargetInfo *getMipsTargetInfo(); +template TargetInfo *getMipsTargetInfo(); +template TargetInfo *getMipsTargetInfo(); -template bool elf::isMipsPIC(const Defined *); -template bool elf::isMipsPIC(const Defined *); -template bool elf::isMipsPIC(const Defined *); -template bool elf::isMipsPIC(const Defined *); +template bool isMipsPIC(const Defined *); +template bool isMipsPIC(const Defined *); +template bool isMipsPIC(const Defined *); +template bool isMipsPIC(const Defined *); + +} // namespace elf +} // namespace lld diff --git a/lld/ELF/Arch/MipsArchTree.cpp b/lld/ELF/Arch/MipsArchTree.cpp index 85329c3bef536..923458afae0d4 100644 --- a/lld/ELF/Arch/MipsArchTree.cpp +++ b/lld/ELF/Arch/MipsArchTree.cpp @@ -23,8 +23,8 @@ using namespace llvm; using namespace llvm::object; using namespace llvm::ELF; -using namespace lld; -using namespace lld::elf; +namespace lld { +namespace elf { namespace { struct ArchTreeEdge { @@ -294,7 +294,7 @@ static uint32_t getArchFlags(ArrayRef files) { return ret; } -template uint32_t elf::calcMipsEFlags() { +template uint32_t calcMipsEFlags() { std::vector v; for (InputFile *f : objectFiles) v.push_back({f, cast>(f)->getObj().getHeader()->e_flags}); @@ -350,8 +350,7 @@ static StringRef getMipsFpAbiName(uint8_t fpAbi) { } } -uint8_t elf::getMipsFpAbiFlag(uint8_t oldFlag, uint8_t newFlag, - StringRef fileName) { +uint8_t getMipsFpAbiFlag(uint8_t oldFlag, uint8_t newFlag, StringRef fileName) { if (compareMipsFpAbi(newFlag, oldFlag) >= 0) return newFlag; if (compareMipsFpAbi(oldFlag, newFlag) < 0) @@ -367,7 +366,7 @@ template static bool isN32Abi(const InputFile *f) { return false; } -bool elf::isMipsN32Abi(const InputFile *f) { +bool isMipsN32Abi(const InputFile *f) { switch (config->ekind) { case ELF32LEKind: return isN32Abi(f); @@ -382,14 +381,17 @@ bool elf::isMipsN32Abi(const InputFile *f) { } } -bool elf::isMicroMips() { return config->eflags & EF_MIPS_MICROMIPS; } +bool isMicroMips() { return config->eflags & EF_MIPS_MICROMIPS; } -bool elf::isMipsR6() { +bool isMipsR6() { uint32_t arch = config->eflags & EF_MIPS_ARCH; return arch == EF_MIPS_ARCH_32R6 || arch == EF_MIPS_ARCH_64R6; } -template uint32_t elf::calcMipsEFlags(); -template uint32_t elf::calcMipsEFlags(); -template uint32_t elf::calcMipsEFlags(); -template uint32_t elf::calcMipsEFlags(); +template uint32_t calcMipsEFlags(); +template uint32_t calcMipsEFlags(); +template uint32_t calcMipsEFlags(); +template uint32_t calcMipsEFlags(); + +} // namespace elf +} // namespace lld diff --git a/lld/ELF/Arch/PPC.cpp b/lld/ELF/Arch/PPC.cpp index cf4ad40499268..c4eecb9a29c22 100644 --- a/lld/ELF/Arch/PPC.cpp +++ b/lld/ELF/Arch/PPC.cpp @@ -16,8 +16,9 @@ using namespace llvm; using namespace llvm::support::endian; using namespace llvm::ELF; -using namespace lld; -using namespace lld::elf; + +namespace lld { +namespace elf { namespace { class PPC final : public TargetInfo { @@ -61,7 +62,7 @@ static void writeFromHalf16(uint8_t *loc, uint32_t insn) { write32(config->isLE ? loc : loc - 2, insn); } -void elf::writePPC32GlinkSection(uint8_t *buf, size_t numEntries) { +void writePPC32GlinkSection(uint8_t *buf, size_t numEntries) { // On PPC Secure PLT ABI, bl foo@plt jumps to a call stub, which loads an // absolute address from a specific .plt slot (usually called .got.plt on // other targets) and jumps there. @@ -435,7 +436,10 @@ void PPC::relaxTlsIeToLe(uint8_t *loc, RelType type, uint64_t val) const { } } -TargetInfo *elf::getPPCTargetInfo() { +TargetInfo *getPPCTargetInfo() { static PPC target; return ⌖ } + +} // namespace elf +} // namespace lld diff --git a/lld/ELF/Arch/PPC64.cpp b/lld/ELF/Arch/PPC64.cpp index 15ad8db4649ca..905903fa4d668 100644 --- a/lld/ELF/Arch/PPC64.cpp +++ b/lld/ELF/Arch/PPC64.cpp @@ -16,8 +16,9 @@ using namespace llvm; using namespace llvm::object; using namespace llvm::support::endian; using namespace llvm::ELF; -using namespace lld; -using namespace lld::elf; + +namespace lld { +namespace elf { static uint64_t ppc64TocOffset = 0x8000; static uint64_t dynamicThreadPointerOffset = 0x8000; @@ -59,7 +60,7 @@ enum DFormOpcd { ADDI = 14 }; -uint64_t elf::getPPC64TocBase() { +uint64_t getPPC64TocBase() { // The TOC consists of sections .got, .toc, .tocbss, .plt in that order. The // TOC starts where the first of these sections starts. We always create a // .got when we see a relocation that uses it, so for us the start is always @@ -73,7 +74,7 @@ uint64_t elf::getPPC64TocBase() { return tocVA + ppc64TocOffset; } -unsigned elf::getPPC64GlobalEntryToLocalEntryOffset(uint8_t stOther) { +unsigned getPPC64GlobalEntryToLocalEntryOffset(uint8_t stOther) { // The offset is encoded into the 3 most significant bits of the st_other // field, with some special values described in section 3.4.1 of the ABI: // 0 --> Zero offset between the GEP and LEP, and the function does NOT use @@ -98,7 +99,7 @@ unsigned elf::getPPC64GlobalEntryToLocalEntryOffset(uint8_t stOther) { return 0; } -bool elf::isPPC64SmallCodeModelTocReloc(RelType type) { +bool isPPC64SmallCodeModelTocReloc(RelType type) { // The only small code model relocations that access the .toc section. return type == R_PPC64_TOC16 || type == R_PPC64_TOC16_DS; } @@ -153,8 +154,8 @@ getRelaTocSymAndAddend(InputSectionBase *tocSec, uint64_t offset) { // ld/lwa 3, 0(3) # load the value from the address // // Returns true if the relaxation is performed. -bool elf::tryRelaxPPC64TocIndirection(RelType type, const Relocation &rel, - uint8_t *bufLoc) { +bool tryRelaxPPC64TocIndirection(RelType type, const Relocation &rel, + uint8_t *bufLoc) { assert(config->tocOptimize); if (rel.addend < 0) return false; @@ -458,7 +459,7 @@ void PPC64::relaxTlsLdToLe(uint8_t *loc, RelType type, uint64_t val) const { } } -unsigned elf::getPPCDFormOp(unsigned secondaryOp) { +unsigned getPPCDFormOp(unsigned secondaryOp) { switch (secondaryOp) { case LBZX: return LBZ; @@ -1093,7 +1094,10 @@ bool PPC64::adjustPrologueForCrossSplitStack(uint8_t *loc, uint8_t *end, return true; } -TargetInfo *elf::getPPC64TargetInfo() { +TargetInfo *getPPC64TargetInfo() { static PPC64 target; return ⌖ } + +} // namespace elf +} // namespace lld diff --git a/lld/ELF/Arch/RISCV.cpp b/lld/ELF/Arch/RISCV.cpp index 6f16ade571773..e7c0e36e03275 100644 --- a/lld/ELF/Arch/RISCV.cpp +++ b/lld/ELF/Arch/RISCV.cpp @@ -14,8 +14,9 @@ using namespace llvm; using namespace llvm::object; using namespace llvm::support::endian; using namespace llvm::ELF; -using namespace lld; -using namespace lld::elf; + +namespace lld { +namespace elf { namespace { @@ -436,7 +437,10 @@ void RISCV::relocateOne(uint8_t *loc, const RelType type, } } -TargetInfo *elf::getRISCVTargetInfo() { +TargetInfo *getRISCVTargetInfo() { static RISCV target; return ⌖ } + +} // namespace elf +} // namespace lld diff --git a/lld/ELF/Arch/SPARCV9.cpp b/lld/ELF/Arch/SPARCV9.cpp index 5299206dd919f..a0afdff08a635 100644 --- a/lld/ELF/Arch/SPARCV9.cpp +++ b/lld/ELF/Arch/SPARCV9.cpp @@ -16,8 +16,9 @@ using namespace llvm; using namespace llvm::support::endian; using namespace llvm::ELF; -using namespace lld; -using namespace lld::elf; + +namespace lld { +namespace elf { namespace { class SPARCV9 final : public TargetInfo { @@ -143,7 +144,10 @@ void SPARCV9::writePlt(uint8_t *buf, uint64_t gotEntryAddr, relocateOne(buf + 4, R_SPARC_WDISP19, -(off + 4 - pltEntrySize)); } -TargetInfo *elf::getSPARCV9TargetInfo() { +TargetInfo *getSPARCV9TargetInfo() { static SPARCV9 target; return ⌖ } + +} // namespace elf +} // namespace lld diff --git a/lld/ELF/Arch/X86.cpp b/lld/ELF/Arch/X86.cpp index e1dd231e8e8d4..b27a6e302e78c 100644 --- a/lld/ELF/Arch/X86.cpp +++ b/lld/ELF/Arch/X86.cpp @@ -16,8 +16,9 @@ using namespace llvm; using namespace llvm::support::endian; using namespace llvm::ELF; -using namespace lld; -using namespace lld::elf; + +namespace lld { +namespace elf { namespace { class X86 : public TargetInfo { @@ -539,7 +540,7 @@ void RetpolineNoPic::writePlt(uint8_t *buf, uint64_t gotPltEntryAddr, write32le(buf + 22, -off - 26); } -TargetInfo *elf::getX86TargetInfo() { +TargetInfo *getX86TargetInfo() { if (config->zRetpolineplt) { if (config->isPic) { static RetpolinePic t; @@ -552,3 +553,6 @@ TargetInfo *elf::getX86TargetInfo() { static X86 t; return &t; } + +} // namespace elf +} // namespace lld diff --git a/lld/ELF/Arch/X86_64.cpp b/lld/ELF/Arch/X86_64.cpp index de67aa5c33dc4..bb8d92fc61b9b 100644 --- a/lld/ELF/Arch/X86_64.cpp +++ b/lld/ELF/Arch/X86_64.cpp @@ -18,8 +18,9 @@ using namespace llvm; using namespace llvm::object; using namespace llvm::support::endian; using namespace llvm::ELF; -using namespace lld; -using namespace lld::elf; + +namespace lld { +namespace elf { namespace { class X86_64 : public TargetInfo { @@ -698,4 +699,7 @@ static TargetInfo *getTargetInfo() { return &t; } -TargetInfo *elf::getX86_64TargetInfo() { return getTargetInfo(); } +TargetInfo *getX86_64TargetInfo() { return getTargetInfo(); } + +} // namespace elf +} // namespace lld diff --git a/lld/ELF/CallGraphSort.cpp b/lld/ELF/CallGraphSort.cpp index 459aa3c01b5ce..6f8ef8954af32 100644 --- a/lld/ELF/CallGraphSort.cpp +++ b/lld/ELF/CallGraphSort.cpp @@ -48,8 +48,9 @@ #include using namespace llvm; -using namespace lld; -using namespace lld::elf; + +namespace lld { +namespace elf { namespace { struct Edge { @@ -264,6 +265,9 @@ DenseMap CallGraphSort::run() { // This first builds a call graph based on the profile data then merges sections // according to the C³ huristic. All clusters are then sorted by a density // metric to further improve locality. -DenseMap elf::computeCallGraphProfileOrder() { +DenseMap computeCallGraphProfileOrder() { return CallGraphSort().run(); } + +} // namespace elf +} // namespace lld diff --git a/lld/ELF/DWARF.cpp b/lld/ELF/DWARF.cpp index 9ef83caf4f70c..a00189a0e3a28 100644 --- a/lld/ELF/DWARF.cpp +++ b/lld/ELF/DWARF.cpp @@ -22,9 +22,9 @@ using namespace llvm; using namespace llvm::object; -using namespace lld; -using namespace lld::elf; +namespace lld { +namespace elf { template LLDDwarfObj::LLDDwarfObj(ObjFile *obj) { for (InputSectionBase *sec : obj->getSections()) { if (!sec) @@ -124,7 +124,10 @@ Optional LLDDwarfObj::find(const llvm::DWARFSection &s, return findAux(*sec.sec, pos, sec.sec->template rels()); } -template class elf::LLDDwarfObj; -template class elf::LLDDwarfObj; -template class elf::LLDDwarfObj; -template class elf::LLDDwarfObj; +template class LLDDwarfObj; +template class LLDDwarfObj; +template class LLDDwarfObj; +template class LLDDwarfObj; + +} // namespace elf +} // namespace lld diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index b2c728880b4a1..fbdf28b1a2029 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -66,17 +66,16 @@ using namespace llvm::object; using namespace llvm::sys; using namespace llvm::support; -using namespace lld; -using namespace lld::elf; +namespace lld { +namespace elf { -Configuration *elf::config; -LinkerDriver *elf::driver; +Configuration *config; +LinkerDriver *driver; static void setConfigs(opt::InputArgList &args); static void readConfigs(opt::InputArgList &args); -bool elf::link(ArrayRef args, bool canExitEarly, - raw_ostream &error) { +bool link(ArrayRef args, bool canExitEarly, raw_ostream &error) { errorHandler().logName = args::getFilenameWithoutExe(args[0]); errorHandler().errorLimitExceededMsg = "too many errors emitted, stopping now (use " @@ -1970,3 +1969,6 @@ template void LinkerDriver::link(opt::InputArgList &args) { // Write the result to the file. writeResult(); } + +} // namespace elf +} // namespace lld diff --git a/lld/ELF/DriverUtils.cpp b/lld/ELF/DriverUtils.cpp index 5ed37e0171b87..43987cd5d4d4e 100644 --- a/lld/ELF/DriverUtils.cpp +++ b/lld/ELF/DriverUtils.cpp @@ -30,8 +30,8 @@ using namespace llvm; using namespace llvm::sys; using namespace llvm::opt; -using namespace lld; -using namespace lld::elf; +namespace lld { +namespace elf { // Create OptTable @@ -143,7 +143,7 @@ opt::InputArgList ELFOptTable::parse(ArrayRef argv) { return args; } -void elf::printHelp() { +void printHelp() { ELFOptTable().PrintHelp( outs(), (config->progName + " [options] file...").str().c_str(), "lld", false /*ShowHidden*/, true /*ShowAllAliases*/); @@ -165,7 +165,7 @@ static std::string rewritePath(StringRef s) { // Reconstructs command line arguments so that so that you can re-run // the same command with the same inputs. This is for --reproduce. -std::string elf::createResponseFile(const opt::InputArgList &args) { +std::string createResponseFile(const opt::InputArgList &args) { SmallString<0> data; raw_svector_ostream os(data); os << "--chroot .\n"; @@ -216,7 +216,7 @@ static Optional findFile(StringRef path1, const Twine &path2) { return None; } -Optional elf::findFromSearchPaths(StringRef path) { +Optional findFromSearchPaths(StringRef path) { for (StringRef dir : config->searchPaths) if (Optional s = findFile(dir, path)) return s; @@ -225,7 +225,7 @@ Optional elf::findFromSearchPaths(StringRef path) { // This is for -l. We'll look for lib.so or lib.a from // search paths. -Optional elf::searchLibraryBaseName(StringRef name) { +Optional searchLibraryBaseName(StringRef name) { for (StringRef dir : config->searchPaths) { if (!config->isStatic) if (Optional s = findFile(dir, "lib" + name + ".so")) @@ -237,17 +237,20 @@ Optional elf::searchLibraryBaseName(StringRef name) { } // This is for -l. -Optional elf::searchLibrary(StringRef name) { - if (name.startswith(":")) - return findFromSearchPaths(name.substr(1)); - return searchLibraryBaseName (name); +Optional searchLibrary(StringRef name) { + if (name.startswith(":")) + return findFromSearchPaths(name.substr(1)); + return searchLibraryBaseName(name); } // If a linker/version script doesn't exist in the current directory, we also // look for the script in the '-L' search paths. This matches the behaviour of // '-T', --version-script=, and linker script INPUT() command in ld.bfd. -Optional elf::searchScript(StringRef name) { +Optional searchScript(StringRef name) { if (fs::exists(name)) return name.str(); return findFromSearchPaths(name); } + +} // namespace elf +} // namespace lld diff --git a/lld/ELF/EhFrame.cpp b/lld/ELF/EhFrame.cpp index b3245dd01669e..a9c66f29446c8 100644 --- a/lld/ELF/EhFrame.cpp +++ b/lld/ELF/EhFrame.cpp @@ -30,9 +30,8 @@ using namespace llvm::ELF; using namespace llvm::dwarf; using namespace llvm::object; -using namespace lld; -using namespace lld::elf; - +namespace lld { +namespace elf { namespace { class EhReader { public: @@ -57,7 +56,7 @@ class EhReader { }; } -size_t elf::readEhRecordSize(InputSectionBase *s, size_t off) { +size_t readEhRecordSize(InputSectionBase *s, size_t off) { return EhReader(s, s->data().slice(off)).readEhRecordSize(); } @@ -149,7 +148,7 @@ void EhReader::skipAugP() { d = d.slice(size); } -uint8_t elf::getFdeEncoding(EhSectionPiece *p) { +uint8_t getFdeEncoding(EhSectionPiece *p) { return EhReader(p->sec, p->data()).getFdeEncoding(); } @@ -195,3 +194,6 @@ uint8_t EhReader::getFdeEncoding() { } return DW_EH_PE_absptr; } + +} // namespace elf +} // namespace lld diff --git a/lld/ELF/ICF.cpp b/lld/ELF/ICF.cpp index c50bbc6a3db87..dce76f79c9b3e 100644 --- a/lld/ELF/ICF.cpp +++ b/lld/ELF/ICF.cpp @@ -88,12 +88,12 @@ #include #include -using namespace lld; -using namespace lld::elf; using namespace llvm; using namespace llvm::ELF; using namespace llvm::object; +namespace lld { +namespace elf { namespace { template class ICF { public: @@ -512,9 +512,12 @@ template void ICF::run() { } // ICF entry point function. -template void elf::doIcf() { ICF().run(); } +template void doIcf() { ICF().run(); } -template void elf::doIcf(); -template void elf::doIcf(); -template void elf::doIcf(); -template void elf::doIcf(); +template void doIcf(); +template void doIcf(); +template void doIcf(); +template void doIcf(); + +} // namespace elf +} // namespace lld diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp index 71f28f4a63ced..f8887b111c4b6 100644 --- a/lld/ELF/InputFiles.cpp +++ b/lld/ELF/InputFiles.cpp @@ -37,18 +37,31 @@ using namespace llvm::sys; using namespace llvm::sys::fs; using namespace llvm::support::endian; -using namespace lld; -using namespace lld::elf; +namespace lld { +// Returns "", "foo.a(bar.o)" or "baz.o". +std::string toString(const elf::InputFile *f) { + if (!f) + return ""; + if (f->toStringCache.empty()) { + if (f->archiveName.empty()) + f->toStringCache = f->getName(); + else + f->toStringCache = (f->archiveName + "(" + f->getName() + ")").str(); + } + return f->toStringCache; +} + +namespace elf { bool InputFile::isInGroup; uint32_t InputFile::nextGroupId; -std::vector elf::binaryFiles; -std::vector elf::bitcodeFiles; -std::vector elf::lazyObjFiles; -std::vector elf::objectFiles; -std::vector elf::sharedFiles; +std::vector binaryFiles; +std::vector bitcodeFiles; +std::vector lazyObjFiles; +std::vector objectFiles; +std::vector sharedFiles; -std::unique_ptr elf::tar; +std::unique_ptr tar; static ELFKind getELFKind(MemoryBufferRef mb, StringRef archiveName) { unsigned char size; @@ -88,7 +101,7 @@ InputFile::InputFile(Kind k, MemoryBufferRef m) ++nextGroupId; } -Optional elf::readFile(StringRef path) { +Optional readFile(StringRef path) { // The --chroot option changes our virtual root directory. // This is useful when you are dealing with files created by --reproduce. if (!config->chroot.empty() && path.startswith("/")) @@ -188,7 +201,7 @@ template static void doParseFile(InputFile *file) { } // Add symbols in File to the symbol table. -void elf::parseFile(InputFile *file) { +void parseFile(InputFile *file) { switch (config->ekind) { case ELF32LEKind: doParseFile(file); @@ -356,20 +369,6 @@ Optional ObjFile::getDILineInfo(InputSectionBase *s, return None; } -// Returns "", "foo.a(bar.o)" or "baz.o". -std::string lld::toString(const InputFile *f) { - if (!f) - return ""; - - if (f->toStringCache.empty()) { - if (f->archiveName.empty()) - f->toStringCache = f->getName(); - else - f->toStringCache = (f->archiveName + "(" + f->getName() + ")").str(); - } - return f->toStringCache; -} - ELFFileBase::ELFFileBase(Kind k, MemoryBufferRef mb) : InputFile(k, mb) { ekind = getELFKind(mb, ""); @@ -1530,8 +1529,8 @@ void BinaryFile::parse() { STV_DEFAULT, STT_OBJECT, data.size(), 0, nullptr}); } -InputFile *elf::createObjectFile(MemoryBufferRef mb, StringRef archiveName, - uint64_t offsetInArchive) { +InputFile *createObjectFile(MemoryBufferRef mb, StringRef archiveName, + uint64_t offsetInArchive) { if (isBitcode(mb)) return make(mb, archiveName, offsetInArchive); @@ -1622,7 +1621,7 @@ template void LazyObjFile::parse() { } } -std::string elf::replaceThinLTOSuffix(StringRef path) { +std::string replaceThinLTOSuffix(StringRef path) { StringRef suffix = config->thinLTOObjectSuffixReplace.first; StringRef repl = config->thinLTOObjectSuffixReplace.second; @@ -1641,12 +1640,15 @@ template void LazyObjFile::parse(); template void LazyObjFile::parse(); template void LazyObjFile::parse(); -template class elf::ObjFile; -template class elf::ObjFile; -template class elf::ObjFile; -template class elf::ObjFile; +template class ObjFile; +template class ObjFile; +template class ObjFile; +template class ObjFile; template void SharedFile::parse(); template void SharedFile::parse(); template void SharedFile::parse(); template void SharedFile::parse(); + +} // namespace elf +} // namespace lld diff --git a/lld/ELF/InputFiles.h b/lld/ELF/InputFiles.h index 5ccc3d402b376..3c777ceacf77a 100644 --- a/lld/ELF/InputFiles.h +++ b/lld/ELF/InputFiles.h @@ -33,15 +33,13 @@ class InputFile; } // namespace llvm namespace lld { -namespace elf { -class InputFile; -class InputSectionBase; -} // Returns "", "foo.a(bar.o)" or "baz.o". std::string toString(const elf::InputFile *f); namespace elf { +class InputFile; +class InputSectionBase; using llvm::object::Archive; diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp index 813423bb5e74f..0c93d2e109590 100644 --- a/lld/ELF/InputSection.cpp +++ b/lld/ELF/InputSection.cpp @@ -37,16 +37,15 @@ using namespace llvm::support; using namespace llvm::support::endian; using namespace llvm::sys; -using namespace lld; -using namespace lld::elf; - -std::vector elf::inputSections; - +namespace lld { // Returns a string to construct an error message. -std::string lld::toString(const InputSectionBase *sec) { +std::string toString(const elf::InputSectionBase *sec) { return (toString(sec->file) + ":(" + sec->name + ")").str(); } +namespace elf { +std::vector inputSections; + template static ArrayRef getSectionContents(ObjFile &file, const typename ELFT::Shdr &hdr) { @@ -619,7 +618,7 @@ static int64_t getTlsTpOffset(const Symbol &s) { // Variant 2. Static TLS blocks, followed by alignment padding are placed // before TP. The alignment padding is added so that (TP - padding - // p_memsz) is congruent to p_vaddr modulo p_align. - elf::PhdrEntry *tls = Out::tlsPhdr; + PhdrEntry *tls = Out::tlsPhdr; switch (config->emachine) { // Variant 1. case EM_ARM: @@ -1082,7 +1081,7 @@ void InputSectionBase::adjustSplitStackFunctionPrologues(uint8_t *buf, end, f->stOther)) continue; if (!getFile()->someNoSplitStack) - error(lld::toString(this) + ": " + f->getName() + + error(toString(this) + ": " + f->getName() + " (with -fsplit-stack) calls " + rel.sym->getName() + " (without -fsplit-stack), but couldn't adjust its prologue"); } @@ -1345,3 +1344,6 @@ template void EhInputSection::split(); template void EhInputSection::split(); template void EhInputSection::split(); template void EhInputSection::split(); + +} // namespace elf +} // namespace lld diff --git a/lld/ELF/LTO.cpp b/lld/ELF/LTO.cpp index 00c87fd5f489d..6da409568c8b1 100644 --- a/lld/ELF/LTO.cpp +++ b/lld/ELF/LTO.cpp @@ -42,8 +42,8 @@ using namespace llvm; using namespace llvm::object; using namespace llvm::ELF; -using namespace lld; -using namespace lld::elf; +namespace lld { +namespace elf { // Creates an empty file to store a list of object files for final // linking of distributed ThinLTO. @@ -303,3 +303,6 @@ std::vector BitcodeCompiler::compile() { ret.push_back(createObjectFile(*file)); return ret; } + +} // namespace elf +} // namespace lld diff --git a/lld/ELF/LinkerScript.cpp b/lld/ELF/LinkerScript.cpp index 70efc22829c64..cebbd89168be5 100644 --- a/lld/ELF/LinkerScript.cpp +++ b/lld/ELF/LinkerScript.cpp @@ -43,10 +43,10 @@ using namespace llvm; using namespace llvm::ELF; using namespace llvm::object; using namespace llvm::support::endian; -using namespace lld; -using namespace lld::elf; -LinkerScript *elf::script; +namespace lld { +namespace elf { +LinkerScript *script; static uint64_t getOutputSectionVA(SectionBase *sec) { OutputSection *os = sec->getOutputSection(); @@ -1202,3 +1202,6 @@ std::vector LinkerScript::getPhdrIndices(OutputSection *cmd) { } return ret; } + +} // namespace elf +} // namespace lld diff --git a/lld/ELF/MapFile.cpp b/lld/ELF/MapFile.cpp index 678f754eaa8b2..4d76e22f37f5f 100644 --- a/lld/ELF/MapFile.cpp +++ b/lld/ELF/MapFile.cpp @@ -34,9 +34,8 @@ using namespace llvm; using namespace llvm::object; -using namespace lld; -using namespace lld::elf; - +namespace lld { +namespace elf { using SymbolMapTy = DenseMap>; static constexpr char indent8[] = " "; // 8 spaces @@ -139,7 +138,7 @@ static void printEhFrame(raw_ostream &os, const EhFrameSection *sec) { } } -void elf::writeMapFile() { +void writeMapFile() { if (config->mapFile.empty()) return; @@ -228,7 +227,7 @@ static void print(StringRef a, StringRef b) { // // In this case, strlen is defined by libc.so.6 and used by other two // files. -void elf::writeCrossReferenceTable() { +void writeCrossReferenceTable() { if (!config->cref) return; @@ -259,3 +258,6 @@ void elf::writeCrossReferenceTable() { print("", toString(file)); } } + +} // namespace elf +} // namespace lld diff --git a/lld/ELF/MarkLive.cpp b/lld/ELF/MarkLive.cpp index bd29be7a4549e..02ab6d18e5371 100644 --- a/lld/ELF/MarkLive.cpp +++ b/lld/ELF/MarkLive.cpp @@ -37,11 +37,11 @@ using namespace llvm; using namespace llvm::ELF; using namespace llvm::object; -using namespace llvm::support::endian; -using namespace lld; -using namespace lld::elf; +namespace endian = llvm::support::endian; +namespace lld { +namespace elf { namespace { template class MarkLive { public: @@ -141,7 +141,7 @@ void MarkLive::scanEhFrameSection(EhInputSection &eh, if (firstRelI == (unsigned)-1) continue; - if (read32(piece.data().data() + 4) == 0) { + if (endian::read32(piece.data().data() + 4) == 0) { // This is a CIE, we only need to worry about the first relocation. It is // known to point to the personality function. resolveReloc(eh, rels[firstRelI], false); @@ -317,7 +317,7 @@ template void MarkLive::moveToMain() { // Before calling this function, Live bits are off for all // input sections. This function make some or all of them on // so that they are emitted to the output file. -template void elf::markLive() { +template void markLive() { // If -gc-sections is not given, no sections are removed. if (!config->gcSections) { for (InputSectionBase *sec : inputSections) @@ -379,7 +379,10 @@ template void elf::markLive() { message("removing unused section " + toString(sec)); } -template void elf::markLive(); -template void elf::markLive(); -template void elf::markLive(); -template void elf::markLive(); +template void markLive(); +template void markLive(); +template void markLive(); +template void markLive(); + +} // namespace elf +} // namespace lld diff --git a/lld/ELF/OutputSections.cpp b/lld/ELF/OutputSections.cpp index d6164103867a9..ea7c96eb676a3 100644 --- a/lld/ELF/OutputSections.cpp +++ b/lld/ELF/OutputSections.cpp @@ -27,9 +27,8 @@ using namespace llvm::object; using namespace llvm::support::endian; using namespace llvm::ELF; -using namespace lld; -using namespace lld::elf; - +namespace lld { +namespace elf { uint8_t *Out::bufferStart; uint8_t Out::first; PhdrEntry *Out::tlsPhdr; @@ -39,7 +38,7 @@ OutputSection *Out::preinitArray; OutputSection *Out::initArray; OutputSection *Out::finiArray; -std::vector elf::outputSections; +std::vector outputSections; uint32_t OutputSection::getPhdrFlags() const { uint32_t ret = 0; @@ -226,7 +225,7 @@ static void sortByOrder(MutableArrayRef in, in[i] = v[i].second; } -uint64_t elf::getHeaderSize() { +uint64_t getHeaderSize() { if (config->oFormatBinary) return 0; return Out::elfHeader->size + Out::programHeaders->size; @@ -446,7 +445,7 @@ void OutputSection::sortCtorsDtors() { // If an input string is in the form of "foo.N" where N is a number, // return N. Otherwise, returns 65536, which is one greater than the // lowest priority. -int elf::getPriority(StringRef s) { +int getPriority(StringRef s) { size_t pos = s.rfind('.'); if (pos == StringRef::npos) return 65536; @@ -456,7 +455,7 @@ int elf::getPriority(StringRef s) { return v; } -std::vector elf::getInputSections(OutputSection *os) { +std::vector getInputSections(OutputSection *os) { std::vector ret; for (BaseCommand *base : os->sectionCommands) if (auto *isd = dyn_cast(base)) @@ -497,3 +496,6 @@ template void OutputSection::maybeCompress(); template void OutputSection::maybeCompress(); template void OutputSection::maybeCompress(); template void OutputSection::maybeCompress(); + +} // namespace elf +} // namespace lld diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp index 442ffdbb2f847..ab3030d91017b 100644 --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -62,9 +62,8 @@ using namespace llvm::ELF; using namespace llvm::object; using namespace llvm::support::endian; -using namespace lld; -using namespace lld::elf; - +namespace lld { +namespace elf { static Optional getLinkerScriptLocation(const Symbol &sym) { for (BaseCommand *base : script->sectionCommands) if (auto *cmd = dyn_cast(base)) @@ -823,7 +822,7 @@ static void reportUndefinedSymbol(const UndefinedDiag &undef, error(msg); } -template void elf::reportUndefinedSymbols() { +template void reportUndefinedSymbols() { // Find the first "undefined symbol" diagnostic for each diagnostic, and // collect all "referenced from" lines at the first diagnostic. DenseMap firstRef; @@ -1405,7 +1404,7 @@ static void scanRelocs(InputSectionBase &sec, ArrayRef rels) { }); } -template void elf::scanRelocations(InputSectionBase &s) { +template void scanRelocations(InputSectionBase &s) { if (s.areRelocsRela) scanRelocs(s, s.relas()); else @@ -1832,11 +1831,14 @@ bool ThunkCreator::createThunks(ArrayRef outputSections) { return addressesChanged; } -template void elf::scanRelocations(InputSectionBase &); -template void elf::scanRelocations(InputSectionBase &); -template void elf::scanRelocations(InputSectionBase &); -template void elf::scanRelocations(InputSectionBase &); -template void elf::reportUndefinedSymbols(); -template void elf::reportUndefinedSymbols(); -template void elf::reportUndefinedSymbols(); -template void elf::reportUndefinedSymbols(); +template void scanRelocations(InputSectionBase &); +template void scanRelocations(InputSectionBase &); +template void scanRelocations(InputSectionBase &); +template void scanRelocations(InputSectionBase &); +template void reportUndefinedSymbols(); +template void reportUndefinedSymbols(); +template void reportUndefinedSymbols(); +template void reportUndefinedSymbols(); + +} // namespace elf +} // namespace lld diff --git a/lld/ELF/ScriptLexer.cpp b/lld/ELF/ScriptLexer.cpp index 953a3df8a31c6..e0ff56fec3f3d 100644 --- a/lld/ELF/ScriptLexer.cpp +++ b/lld/ELF/ScriptLexer.cpp @@ -36,9 +36,9 @@ #include "llvm/ADT/Twine.h" using namespace llvm; -using namespace lld; -using namespace lld::elf; +namespace lld { +namespace elf { // Returns a whole line containing the current token. StringRef ScriptLexer::getLine() { StringRef s = getCurrentMB().getBuffer(); @@ -298,3 +298,6 @@ MemoryBufferRef ScriptLexer::getCurrentMB() { return mb; llvm_unreachable("getCurrentMB: failed to find a token"); } + +} // namespace elf +} // namespace lld diff --git a/lld/ELF/ScriptParser.cpp b/lld/ELF/ScriptParser.cpp index 9ada28ca13eb3..fd8de3b54bd72 100644 --- a/lld/ELF/ScriptParser.cpp +++ b/lld/ELF/ScriptParser.cpp @@ -37,9 +37,9 @@ using namespace llvm; using namespace llvm::ELF; using namespace llvm::support::endian; -using namespace lld; -using namespace lld::elf; +namespace lld { +namespace elf { namespace { class ScriptParser final : ScriptLexer { public: @@ -1268,7 +1268,7 @@ Expr ScriptParser::readPrimary() { return [=] { return cmd->size; }; } if (tok == "SIZEOF_HEADERS") - return [=] { return elf::getHeaderSize(); }; + return [=] { return getHeaderSize(); }; // Tok is the dot. if (tok == ".") @@ -1511,18 +1511,19 @@ std::pair ScriptParser::readMemoryAttributes() { return {flags, negFlags}; } -void elf::readLinkerScript(MemoryBufferRef mb) { +void readLinkerScript(MemoryBufferRef mb) { ScriptParser(mb).readLinkerScript(); } -void elf::readVersionScript(MemoryBufferRef mb) { +void readVersionScript(MemoryBufferRef mb) { ScriptParser(mb).readVersionScript(); } -void elf::readDynamicList(MemoryBufferRef mb) { - ScriptParser(mb).readDynamicList(); -} +void readDynamicList(MemoryBufferRef mb) { ScriptParser(mb).readDynamicList(); } -void elf::readDefsym(StringRef name, MemoryBufferRef mb) { +void readDefsym(StringRef name, MemoryBufferRef mb) { ScriptParser(mb).readDefsym(name); } + +} // namespace elf +} // namespace lld diff --git a/lld/ELF/SymbolTable.cpp b/lld/ELF/SymbolTable.cpp index 8470a077e7f00..5f6008ef908b1 100644 --- a/lld/ELF/SymbolTable.cpp +++ b/lld/ELF/SymbolTable.cpp @@ -27,10 +27,9 @@ using namespace llvm; using namespace llvm::object; using namespace llvm::ELF; -using namespace lld; -using namespace lld::elf; - -SymbolTable *elf::symtab; +namespace lld { +namespace elf { +SymbolTable *symtab; void SymbolTable::wrap(Symbol *sym, Symbol *real, Symbol *wrap) { // Swap symbols as instructed by -wrap. @@ -265,3 +264,6 @@ void SymbolTable::scanVersionScript() { // --dynamic-list. handleDynamicList(); } + +} // namespace elf +} // namespace lld diff --git a/lld/ELF/Symbols.cpp b/lld/ELF/Symbols.cpp index e34f47d6830e9..c0cba21cfe8d7 100644 --- a/lld/ELF/Symbols.cpp +++ b/lld/ELF/Symbols.cpp @@ -23,9 +23,20 @@ using namespace llvm; using namespace llvm::object; using namespace llvm::ELF; -using namespace lld; -using namespace lld::elf; +namespace lld { +// Returns a symbol for an error message. +static std::string demangle(StringRef symName) { + if (elf::config->demangle) + return demangleItanium(symName); + return symName; +} +std::string toString(const elf::Symbol &b) { return demangle(b.getName()); } +std::string toELFString(const Archive::Symbol &b) { + return demangle(b.getName()); +} + +namespace elf { Defined *ElfSym::bss; Defined *ElfSym::etext1; Defined *ElfSym::etext2; @@ -42,19 +53,6 @@ Defined *ElfSym::relaIpltEnd; Defined *ElfSym::riscvGlobalPointer; Defined *ElfSym::tlsModuleBase; -// Returns a symbol for an error message. -static std::string demangle(StringRef symName) { - if (config->demangle) - return demangleItanium(symName); - return symName; -} -namespace lld { -std::string toString(const Symbol &b) { return demangle(b.getName()); } -std::string toELFString(const Archive::Symbol &b) { - return demangle(b.getName()); -} -} // namespace lld - static uint64_t getSymVA(const Symbol &sym, int64_t &addend) { switch (sym.kind()) { case Symbol::DefinedKind: { @@ -298,7 +296,7 @@ bool Symbol::includeInDynsym() const { } // Print out a log message for --trace-symbol. -void elf::printTraceSymbol(const Symbol *sym) { +void printTraceSymbol(const Symbol *sym) { std::string s; if (sym->isUndefined()) s = ": reference to "; @@ -314,7 +312,7 @@ void elf::printTraceSymbol(const Symbol *sym) { message(toString(sym->file) + s + sym->getName()); } -void elf::maybeWarnUnorderableSymbol(const Symbol *sym) { +void maybeWarnUnorderableSymbol(const Symbol *sym) { if (!config->warnSymbolOrdering) return; @@ -655,3 +653,6 @@ void Symbol::resolveShared(const SharedSymbol &other) { referenced = true; } } + +} // namespace elf +} // namespace lld diff --git a/lld/ELF/Symbols.h b/lld/ELF/Symbols.h index 713d8be9e3ccb..d43568fe295c7 100644 --- a/lld/ELF/Symbols.h +++ b/lld/ELF/Symbols.h @@ -21,6 +21,13 @@ #include "llvm/Object/ELF.h" namespace lld { +std::string toString(const elf::Symbol &); + +// There are two different ways to convert an Archive::Symbol to a string: +// One for Microsoft name mangling and one for Itanium name mangling. +// Call the functions toCOFFString and toELFString, not just toString. +std::string toELFString(const llvm::object::Archive::Symbol &); + namespace elf { class CommonSymbol; class Defined; @@ -30,16 +37,6 @@ class LazyObject; class SharedSymbol; class Symbol; class Undefined; -} // namespace elf - -std::string toString(const elf::Symbol &); - -// There are two different ways to convert an Archive::Symbol to a string: -// One for Microsoft name mangling and one for Itanium name mangling. -// Call the functions toCOFFString and toELFString, not just toString. -std::string toELFString(const elf::Archive::Symbol &); - -namespace elf { // This is a StringRef-like container that doesn't run strlen(). // diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp index 5d9ebfb532530..ff35bb7bd10ce 100644 --- a/lld/ELF/SyntheticSections.cpp +++ b/lld/ELF/SyntheticSections.cpp @@ -45,13 +45,12 @@ using namespace llvm::ELF; using namespace llvm::object; using namespace llvm::support; -using namespace lld; -using namespace lld::elf; - using llvm::support::endian::read32le; using llvm::support::endian::write32le; using llvm::support::endian::write64le; +namespace lld { +namespace elf { constexpr size_t MergeNoTailSection::numShards; static uint64_t readUint(uint8_t *buf) { @@ -82,7 +81,7 @@ static ArrayRef getVersion() { // With this feature, you can identify LLD-generated binaries easily // by "readelf --string-dump .comment ". // The returned object is a mergeable string section. -MergeInputSection *elf::createCommentSection() { +MergeInputSection *createCommentSection() { return make(SHF_MERGE | SHF_STRINGS, SHT_PROGBITS, 1, getVersion(), ".comment"); } @@ -138,7 +137,7 @@ MipsAbiFlagsSection *MipsAbiFlagsSection::create() { flags.ases |= s->ases; flags.flags1 |= s->flags1; flags.flags2 |= s->flags2; - flags.fp_abi = elf::getMipsFpAbiFlag(flags.fp_abi, s->fp_abi, filename); + flags.fp_abi = getMipsFpAbiFlag(flags.fp_abi, s->fp_abi, filename); }; if (create) @@ -252,7 +251,7 @@ MipsReginfoSection *MipsReginfoSection::create() { return make>(reginfo); } -InputSection *elf::createInterpSection() { +InputSection *createInterpSection() { // StringSaver guarantees that the returned string ends with '\0'. StringRef s = saver.save(config->dynamicLinker); ArrayRef contents = {(const uint8_t *)s.data(), s.size() + 1}; @@ -261,8 +260,8 @@ InputSection *elf::createInterpSection() { ".interp"); } -Defined *elf::addSyntheticLocal(StringRef name, uint8_t type, uint64_t value, - uint64_t size, InputSectionBase §ion) { +Defined *addSyntheticLocal(StringRef name, uint8_t type, uint64_t value, + uint64_t size, InputSectionBase §ion) { auto *s = make(section.file, name, STB_LOCAL, STV_DEFAULT, type, value, size, §ion); if (in.symTab) @@ -1274,7 +1273,7 @@ static uint64_t addPltRelSz() { // Add remaining entries to complete .dynamic contents. template void DynamicSection::finalizeContents() { - elf::Partition &part = getPartition(); + Partition &part = getPartition(); bool isMain = part.name.empty(); for (StringRef s : config->filterList) @@ -2940,7 +2939,7 @@ bool VersionTableSection::isNeeded() const { return getPartition().verDef || getPartition().verNeed->isNeeded(); } -void elf::addVerneed(Symbol *ss) { +void addVerneed(Symbol *ss) { auto &file = cast(*ss->file); if (ss->verdefIndex == VER_NDX_GLOBAL) { ss->versionId = VER_NDX_GLOBAL; @@ -3123,16 +3122,16 @@ void MergeNoTailSection::finalizeContents() { }); } -MergeSyntheticSection *elf::createMergeSynthetic(StringRef name, uint32_t type, - uint64_t flags, - uint32_t alignment) { +MergeSyntheticSection *createMergeSynthetic(StringRef name, uint32_t type, + uint64_t flags, + uint32_t alignment) { bool shouldTailMerge = (flags & SHF_STRINGS) && config->optimize >= 2; if (shouldTailMerge) return make(name, type, flags, alignment); return make(name, type, flags, alignment); } -template void elf::splitSections() { +template void splitSections() { // splitIntoPieces needs to be called on each MergeInputSection // before calling finalizeContents(). parallelForEach(inputSections, [](InputSectionBase *sec) { @@ -3486,7 +3485,7 @@ static uint8_t getAbiVersion() { return 0; } -template void elf::writeEhdr(uint8_t *buf, Partition &part) { +template void writeEhdr(uint8_t *buf, Partition &part) { // For executable segments, the trap instructions are written before writing // the header. Setting Elf header bytes to zero ensures that any unused bytes // in header are zero-cleared, instead of having trap instructions. @@ -3512,7 +3511,7 @@ template void elf::writeEhdr(uint8_t *buf, Partition &part) { } } -template void elf::writePhdrs(uint8_t *buf, Partition &part) { +template void writePhdrs(uint8_t *buf, Partition &part) { // Write the program header table. auto *hBuf = reinterpret_cast(buf); for (PhdrEntry *p : part.phdrs) { @@ -3587,87 +3586,90 @@ void PartitionIndexSection::writeTo(uint8_t *buf) { } } -InStruct elf::in; +InStruct in; -std::vector elf::partitions; -Partition *elf::mainPart; +std::vector partitions; +Partition *mainPart; template GdbIndexSection *GdbIndexSection::create(); template GdbIndexSection *GdbIndexSection::create(); template GdbIndexSection *GdbIndexSection::create(); template GdbIndexSection *GdbIndexSection::create(); -template void elf::splitSections(); -template void elf::splitSections(); -template void elf::splitSections(); -template void elf::splitSections(); +template void splitSections(); +template void splitSections(); +template void splitSections(); +template void splitSections(); template void PltSection::addEntry(Symbol &Sym); template void PltSection::addEntry(Symbol &Sym); template void PltSection::addEntry(Symbol &Sym); template void PltSection::addEntry(Symbol &Sym); -template class elf::MipsAbiFlagsSection; -template class elf::MipsAbiFlagsSection; -template class elf::MipsAbiFlagsSection; -template class elf::MipsAbiFlagsSection; - -template class elf::MipsOptionsSection; -template class elf::MipsOptionsSection; -template class elf::MipsOptionsSection; -template class elf::MipsOptionsSection; - -template class elf::MipsReginfoSection; -template class elf::MipsReginfoSection; -template class elf::MipsReginfoSection; -template class elf::MipsReginfoSection; - -template class elf::DynamicSection; -template class elf::DynamicSection; -template class elf::DynamicSection; -template class elf::DynamicSection; - -template class elf::RelocationSection; -template class elf::RelocationSection; -template class elf::RelocationSection; -template class elf::RelocationSection; - -template class elf::AndroidPackedRelocationSection; -template class elf::AndroidPackedRelocationSection; -template class elf::AndroidPackedRelocationSection; -template class elf::AndroidPackedRelocationSection; - -template class elf::RelrSection; -template class elf::RelrSection; -template class elf::RelrSection; -template class elf::RelrSection; - -template class elf::SymbolTableSection; -template class elf::SymbolTableSection; -template class elf::SymbolTableSection; -template class elf::SymbolTableSection; - -template class elf::VersionNeedSection; -template class elf::VersionNeedSection; -template class elf::VersionNeedSection; -template class elf::VersionNeedSection; - -template void elf::writeEhdr(uint8_t *Buf, Partition &Part); -template void elf::writeEhdr(uint8_t *Buf, Partition &Part); -template void elf::writeEhdr(uint8_t *Buf, Partition &Part); -template void elf::writeEhdr(uint8_t *Buf, Partition &Part); - -template void elf::writePhdrs(uint8_t *Buf, Partition &Part); -template void elf::writePhdrs(uint8_t *Buf, Partition &Part); -template void elf::writePhdrs(uint8_t *Buf, Partition &Part); -template void elf::writePhdrs(uint8_t *Buf, Partition &Part); - -template class elf::PartitionElfHeaderSection; -template class elf::PartitionElfHeaderSection; -template class elf::PartitionElfHeaderSection; -template class elf::PartitionElfHeaderSection; - -template class elf::PartitionProgramHeadersSection; -template class elf::PartitionProgramHeadersSection; -template class elf::PartitionProgramHeadersSection; -template class elf::PartitionProgramHeadersSection; +template class MipsAbiFlagsSection; +template class MipsAbiFlagsSection; +template class MipsAbiFlagsSection; +template class MipsAbiFlagsSection; + +template class MipsOptionsSection; +template class MipsOptionsSection; +template class MipsOptionsSection; +template class MipsOptionsSection; + +template class MipsReginfoSection; +template class MipsReginfoSection; +template class MipsReginfoSection; +template class MipsReginfoSection; + +template class DynamicSection; +template class DynamicSection; +template class DynamicSection; +template class DynamicSection; + +template class RelocationSection; +template class RelocationSection; +template class RelocationSection; +template class RelocationSection; + +template class AndroidPackedRelocationSection; +template class AndroidPackedRelocationSection; +template class AndroidPackedRelocationSection; +template class AndroidPackedRelocationSection; + +template class RelrSection; +template class RelrSection; +template class RelrSection; +template class RelrSection; + +template class SymbolTableSection; +template class SymbolTableSection; +template class SymbolTableSection; +template class SymbolTableSection; + +template class VersionNeedSection; +template class VersionNeedSection; +template class VersionNeedSection; +template class VersionNeedSection; + +template void writeEhdr(uint8_t *Buf, Partition &Part); +template void writeEhdr(uint8_t *Buf, Partition &Part); +template void writeEhdr(uint8_t *Buf, Partition &Part); +template void writeEhdr(uint8_t *Buf, Partition &Part); + +template void writePhdrs(uint8_t *Buf, Partition &Part); +template void writePhdrs(uint8_t *Buf, Partition &Part); +template void writePhdrs(uint8_t *Buf, Partition &Part); +template void writePhdrs(uint8_t *Buf, Partition &Part); + +template class PartitionElfHeaderSection; +template class PartitionElfHeaderSection; +template class PartitionElfHeaderSection; +template class PartitionElfHeaderSection; + +template class PartitionProgramHeadersSection; +template class PartitionProgramHeadersSection; +template class PartitionProgramHeadersSection; +template class PartitionProgramHeadersSection; + +} // namespace elf +} // namespace lld diff --git a/lld/ELF/Target.cpp b/lld/ELF/Target.cpp index d07478a5178c3..c79e12ee93823 100644 --- a/lld/ELF/Target.cpp +++ b/lld/ELF/Target.cpp @@ -34,19 +34,19 @@ using namespace llvm; using namespace llvm::object; using namespace llvm::ELF; -using namespace lld; -using namespace lld::elf; -const TargetInfo *elf::target; - -std::string lld::toString(RelType type) { +namespace lld { +std::string toString(elf::RelType type) { StringRef s = getELFRelocationTypeName(elf::config->emachine, type); if (s == "Unknown") return ("Unknown (" + Twine(type) + ")").str(); return s; } -TargetInfo *elf::getTarget() { +namespace elf { +const TargetInfo *target; + +TargetInfo *getTarget() { switch (config->emachine) { case EM_386: case EM_IAMCU: @@ -103,7 +103,7 @@ template static ErrorPlace getErrPlace(const uint8_t *loc) { return {}; } -ErrorPlace elf::getErrorPlace(const uint8_t *loc) { +ErrorPlace getErrorPlace(const uint8_t *loc) { switch (config->ekind) { case ELF32LEKind: return getErrPlace(loc); @@ -179,3 +179,6 @@ uint64_t TargetInfo::getImageBase() const { return *config->imageBase; return config->isPic ? 0 : defaultImageBase; } + +} // namespace elf +} // namespace lld diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp index 2ce36079db30f..dc0f9254596a0 100644 --- a/lld/ELF/Writer.cpp +++ b/lld/ELF/Writer.cpp @@ -36,9 +36,8 @@ using namespace llvm::object; using namespace llvm::support; using namespace llvm::support::endian; -using namespace lld; -using namespace lld::elf; - +namespace lld { +namespace elf { namespace { // The writer writes a SymbolTable result to a file. template class Writer { @@ -92,7 +91,7 @@ static bool isSectionPrefix(StringRef prefix, StringRef name) { return name.startswith(prefix) || name == prefix.drop_back(); } -StringRef elf::getOutputSectionName(const InputSectionBase *s) { +StringRef getOutputSectionName(const InputSectionBase *s) { if (config->relocatable) return s->name; @@ -140,7 +139,7 @@ static bool needsInterpSection() { script->needsInterpSection(); } -template void elf::writeResult() { Writer().run(); } +template void writeResult() { Writer().run(); } static void removeEmptyPTLoad(std::vector &phdrs) { llvm::erase_if(phdrs, [&](const PhdrEntry *p) { @@ -153,7 +152,7 @@ static void removeEmptyPTLoad(std::vector &phdrs) { }); } -void elf::copySectionsIntoPartitions() { +void copySectionsIntoPartitions() { std::vector newSections; for (unsigned part = 2; part != partitions.size() + 1; ++part) { for (InputSectionBase *s : inputSections) { @@ -175,7 +174,7 @@ void elf::copySectionsIntoPartitions() { newSections.end()); } -void elf::combineEhSections() { +void combineEhSections() { for (InputSectionBase *&s : inputSections) { // Ignore dead sections and the partition end marker (.part.end), // whose partition number is out of bounds. @@ -216,7 +215,7 @@ static Defined *addAbsolute(StringRef name) { // The linker is expected to define some symbols depending on // the linking result. This function defines such symbols. -void elf::addReservedSymbols() { +void addReservedSymbols() { if (config->emachine == EM_MIPS) { // Define _gp for MIPS. st_value of _gp symbol will be updated by Writer // so that it points to an absolute address which by default is relative @@ -309,7 +308,7 @@ static OutputSection *findSection(StringRef name, unsigned partition = 1) { return nullptr; } -template void elf::createSyntheticSections() { +template void createSyntheticSections() { // Initialize all pointers with NULL. This is needed because // you can call lld::elf::main more than once as a library. memset(&Out::first, 0, sizeof(Out)); @@ -2737,12 +2736,15 @@ template void Writer::writeBuildId() { part.buildId->writeBuildId(buildId); } -template void elf::createSyntheticSections(); -template void elf::createSyntheticSections(); -template void elf::createSyntheticSections(); -template void elf::createSyntheticSections(); +template void createSyntheticSections(); +template void createSyntheticSections(); +template void createSyntheticSections(); +template void createSyntheticSections(); + +template void writeResult(); +template void writeResult(); +template void writeResult(); +template void writeResult(); -template void elf::writeResult(); -template void elf::writeResult(); -template void elf::writeResult(); -template void elf::writeResult(); +} // namespace elf +} // namespace lld From 24ec80425acc140e919d6718b303ca13b32b8e36 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Mon, 7 Oct 2019 08:52:07 +0000 Subject: [PATCH 090/254] [ELF][MIPS] De-template writeValue. NFC Depends on D68561. llvm-svn: 373886 --- lld/ELF/Arch/Mips.cpp | 43 ++++++++++++++++++++----------------------- 1 file changed, 20 insertions(+), 23 deletions(-) diff --git a/lld/ELF/Arch/Mips.cpp b/lld/ELF/Arch/Mips.cpp index 881f647b5a4a8..d8fa306a62052 100644 --- a/lld/ELF/Arch/Mips.cpp +++ b/lld/ELF/Arch/Mips.cpp @@ -213,7 +213,6 @@ template static uint32_t readShuffle(const uint8_t *loc) { return v; } -template static void writeValue(uint8_t *loc, uint64_t v, uint8_t bitsSize, uint8_t shift) { uint32_t instr = read32(loc); @@ -230,7 +229,7 @@ static void writeShuffleValue(uint8_t *loc, uint64_t v, uint8_t bitsSize, if (E == support::little) std::swap(words[0], words[1]); - writeValue(loc, v, bitsSize, shift); + writeValue(loc, v, bitsSize, shift); if (E == support::little) std::swap(words[0], words[1]); @@ -246,7 +245,6 @@ static void writeMicroRelocation16(uint8_t *loc, uint64_t v, uint8_t bitsSize, } template void MIPS::writePltHeader(uint8_t *buf) const { - const endianness e = ELFT::TargetEndianness; if (isMicroMips()) { uint64_t gotPlt = in.gotPlt->getVA(); uint64_t plt = in.plt->getVA(); @@ -302,16 +300,15 @@ template void MIPS::writePltHeader(uint8_t *buf) const { write32(buf + 28, 0x2718fffe); // subu $24, $24, 2 uint64_t gotPlt = in.gotPlt->getVA(); - writeValue(buf, gotPlt + 0x8000, 16, 16); - writeValue(buf + 4, gotPlt, 16, 0); - writeValue(buf + 8, gotPlt, 16, 0); + writeValue(buf, gotPlt + 0x8000, 16, 16); + writeValue(buf + 4, gotPlt, 16, 0); + writeValue(buf + 8, gotPlt, 16, 0); } template void MIPS::writePlt(uint8_t *buf, uint64_t gotPltEntryAddr, uint64_t pltEntryAddr, int32_t index, unsigned relOff) const { - const endianness e = ELFT::TargetEndianness; if (isMicroMips()) { // Overwrite trap instructions written by Writer::writeTrapInstr. memset(buf, 0, pltEntrySize); @@ -341,9 +338,9 @@ void MIPS::writePlt(uint8_t *buf, uint64_t gotPltEntryAddr, write32(buf + 4, loadInst); // l[wd] $25, %lo(.got.plt entry)($15) write32(buf + 8, jrInst); // jr $25 / jr.hb $25 write32(buf + 12, addInst); // [d]addiu $24, $15, %lo(.got.plt entry) - writeValue(buf, gotPltEntryAddr + 0x8000, 16, 16); - writeValue(buf + 4, gotPltEntryAddr, 16, 0); - writeValue(buf + 12, gotPltEntryAddr, 16, 0); + writeValue(buf, gotPltEntryAddr + 0x8000, 16, 16); + writeValue(buf + 4, gotPltEntryAddr, 16, 0); + writeValue(buf + 12, gotPltEntryAddr, 16, 0); } template @@ -494,7 +491,7 @@ static uint64_t fixupCrossModeJump(uint8_t *loc, RelType type, uint64_t val) { case R_MIPS_26: { uint32_t inst = read32(loc) >> 26; if (inst == 0x3 || inst == 0x1d) { // JAL or JALX - writeValue(loc, 0x1d << 26, 32, 0); + writeValue(loc, 0x1d << 26, 32, 0); return val; } break; @@ -558,17 +555,17 @@ void MIPS::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { write64(loc, val); break; case R_MIPS_26: - writeValue(loc, val, 26, 2); + writeValue(loc, val, 26, 2); break; case R_MIPS_GOT16: // The R_MIPS_GOT16 relocation's value in "relocatable" linking mode // is updated addend (not a GOT index). In that case write high 16 bits // to store a correct addend value. if (config->relocatable) { - writeValue(loc, val + 0x8000, 16, 16); + writeValue(loc, val + 0x8000, 16, 16); } else { checkInt(loc, val, 16, type); - writeValue(loc, val, 16, 0); + writeValue(loc, val, 16, 0); } break; case R_MICROMIPS_GOT16: @@ -595,7 +592,7 @@ void MIPS::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { case R_MIPS_PCLO16: case R_MIPS_TLS_DTPREL_LO16: case R_MIPS_TLS_TPREL_LO16: - writeValue(loc, val, 16, 0); + writeValue(loc, val, 16, 0); break; case R_MICROMIPS_GPREL16: case R_MICROMIPS_TLS_GD: @@ -621,7 +618,7 @@ void MIPS::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { case R_MIPS_PCHI16: case R_MIPS_TLS_DTPREL_HI16: case R_MIPS_TLS_TPREL_HI16: - writeValue(loc, val + 0x8000, 16, 16); + writeValue(loc, val + 0x8000, 16, 16); break; case R_MICROMIPS_CALL_HI16: case R_MICROMIPS_GOT_HI16: @@ -631,10 +628,10 @@ void MIPS::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { writeShuffleValue(loc, val + 0x8000, 16, 16); break; case R_MIPS_HIGHER: - writeValue(loc, val + 0x80008000, 16, 32); + writeValue(loc, val + 0x80008000, 16, 32); break; case R_MIPS_HIGHEST: - writeValue(loc, val + 0x800080008000, 16, 48); + writeValue(loc, val + 0x800080008000, 16, 48); break; case R_MIPS_JALR: val -= 4; @@ -657,25 +654,25 @@ void MIPS::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { case R_MIPS_PC16: checkAlignment(loc, val, 4, type); checkInt(loc, val, 18, type); - writeValue(loc, val, 16, 2); + writeValue(loc, val, 16, 2); break; case R_MIPS_PC19_S2: checkAlignment(loc, val, 4, type); checkInt(loc, val, 21, type); - writeValue(loc, val, 19, 2); + writeValue(loc, val, 19, 2); break; case R_MIPS_PC21_S2: checkAlignment(loc, val, 4, type); checkInt(loc, val, 23, type); - writeValue(loc, val, 21, 2); + writeValue(loc, val, 21, 2); break; case R_MIPS_PC26_S2: checkAlignment(loc, val, 4, type); checkInt(loc, val, 28, type); - writeValue(loc, val, 26, 2); + writeValue(loc, val, 26, 2); break; case R_MIPS_PC32: - writeValue(loc, val, 32, 0); + writeValue(loc, val, 32, 0); break; case R_MICROMIPS_26_S1: case R_MICROMIPS_PC26_S1: From 32b47ddb2deba6896aa00405c4239b808a62da09 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Mon, 7 Oct 2019 09:30:15 +0000 Subject: [PATCH 091/254] clang-cl: Ignore the new /ZH options These were added to the MS docs in https://github.com/MicrosoftDocs/cpp-docs/commit/85b9b6967e58e485251450f7451673f6fc873e88 and are supposedly available in VS 2019 16.4 (though my 2019 Preview, version 16.4.0-pre.1.0 don't seem to have them.) llvm-svn: 373887 --- clang/include/clang/Driver/CLCompatOptions.td | 3 +++ clang/test/Driver/cl-options.c | 3 +++ 2 files changed, 6 insertions(+) diff --git a/clang/include/clang/Driver/CLCompatOptions.td b/clang/include/clang/Driver/CLCompatOptions.td index bb51c3aa11d57..50d4622009c95 100644 --- a/clang/include/clang/Driver/CLCompatOptions.td +++ b/clang/include/clang/Driver/CLCompatOptions.td @@ -401,6 +401,9 @@ def _SLASH_Zc_inline : CLIgnoredFlag<"Zc:inline">; def _SLASH_Zc_rvalueCast : CLIgnoredFlag<"Zc:rvalueCast">; def _SLASH_Zc_ternary : CLIgnoredFlag<"Zc:ternary">; def _SLASH_Zc_wchar_t : CLIgnoredFlag<"Zc:wchar_t">; +def _SLASH_ZH_MD5 : CLIgnoredFlag<"ZH:MD5">; +def _SLASH_ZH_SHA1 : CLIgnoredFlag<"ZH:SHA1">; +def _SLASH_ZH_SHA_256 : CLIgnoredFlag<"ZH:SHA_256">; def _SLASH_Zm : CLIgnoredJoined<"Zm">; def _SLASH_Zo : CLIgnoredFlag<"Zo">; def _SLASH_Zo_ : CLIgnoredFlag<"Zo-">; diff --git a/clang/test/Driver/cl-options.c b/clang/test/Driver/cl-options.c index 68f08b0c18c64..d11d46253c9ea 100644 --- a/clang/test/Driver/cl-options.c +++ b/clang/test/Driver/cl-options.c @@ -377,6 +377,9 @@ // RUN: /Zc:rvalueCast \ // RUN: /Zc:ternary \ // RUN: /Zc:wchar_t \ +// RUN: /ZH:MD5 \ +// RUN: /ZH:SHA1 \ +// RUN: /ZH:SHA_256 \ // RUN: /Zm \ // RUN: /Zo \ // RUN: /Zo- \ From 6942327a8f3ba90b2480f39318901e22269d16e7 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Mon, 7 Oct 2019 09:54:53 +0000 Subject: [PATCH 092/254] [IA] Recognize hexadecimal escape sequences Summary: Implement support for hexadecimal escape sequences to match how GNU 'as' handles them. I.e., read all hexadecimal characters and truncate to the lower 16 bits. Reviewers: nickdesaulniers Subscribers: hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D68483 llvm-svn: 373888 --- llvm/lib/MC/MCParser/AsmParser.cpp | 17 ++++++++++++++++- llvm/test/MC/AsmParser/directive_ascii.s | 5 +++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/llvm/lib/MC/MCParser/AsmParser.cpp b/llvm/lib/MC/MCParser/AsmParser.cpp index 381bf96416166..b25959b102d6d 100644 --- a/llvm/lib/MC/MCParser/AsmParser.cpp +++ b/llvm/lib/MC/MCParser/AsmParser.cpp @@ -2914,11 +2914,26 @@ bool AsmParser::parseEscapedString(std::string &Data) { } // Recognize escaped characters. Note that this escape semantics currently - // loosely follows Darwin 'as'. Notably, it doesn't support hex escapes. + // loosely follows Darwin 'as'. ++i; if (i == e) return TokError("unexpected backslash at end of string"); + // Recognize hex sequences similarly to GNU 'as'. + if (Str[i] == 'x' || Str[i] == 'X') { + if (!isHexDigit(Str[i + 1])) + return TokError("invalid hexadecimal escape sequence"); + + // Consume hex characters. GNU 'as' reads all hexadecimal characters and + // then truncates to the lower 16 bits. Seems reasonable. + unsigned Value = 0; + while (isHexDigit(Str[i + 1])) + Value = Value * 16 + hexDigitValue(Str[++i]); + + Data += (unsigned char)(Value & 0xFF); + continue; + } + // Recognize octal sequences. if ((unsigned)(Str[i] - '0') <= 7) { // Consume up to three octal characters. diff --git a/llvm/test/MC/AsmParser/directive_ascii.s b/llvm/test/MC/AsmParser/directive_ascii.s index a7ba7bbd5da13..604f9721bcca9 100644 --- a/llvm/test/MC/AsmParser/directive_ascii.s +++ b/llvm/test/MC/AsmParser/directive_ascii.s @@ -39,3 +39,8 @@ TEST5: # CHECK: .byte 0 TEST6: .string "B", "C" + +# CHECK: TEST7: +# CHECK: .ascii "dk" +TEST7: + .ascii "\x64\Xa6B" From 2fa81d201f4b6238c9cc92fdca5cd77ca7fe2435 Mon Sep 17 00:00:00 2001 From: Haojian Wu Date: Mon, 7 Oct 2019 10:10:31 +0000 Subject: [PATCH 093/254] [clangd] Collect missing macro references. Summary: Semantic highlghting is missing a few macro references. Reviewers: ilya-biryukov Subscribers: MaskRay, jkorous, arphaman, kadircet, usaxena95, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D68458 llvm-svn: 373889 --- clang-tools-extra/clangd/CollectMacros.h | 26 +++++++++++++++++-- .../unittests/SemanticHighlightingTests.cpp | 14 ++++++++++ 2 files changed, 38 insertions(+), 2 deletions(-) diff --git a/clang-tools-extra/clangd/CollectMacros.h b/clang-tools-extra/clangd/CollectMacros.h index 21227e1ed570f..619c9f54b58a3 100644 --- a/clang-tools-extra/clangd/CollectMacros.h +++ b/clang-tools-extra/clangd/CollectMacros.h @@ -25,7 +25,8 @@ struct MainFileMacros { std::vector Ranges; }; -/// Collects macro definitions and expansions in the main file. It is used to: +/// Collects macro references (e.g. definitions, expansions) in the main file. +/// It is used to: /// - collect macros in the preamble section of the main file (in Preamble.cpp) /// - collect macros after the preamble of the main file (in ParsedAST.cpp) class CollectMainFileMacros : public PPCallbacks { @@ -49,6 +50,27 @@ class CollectMainFileMacros : public PPCallbacks { add(MacroName, MD.getMacroInfo()); } + void MacroUndefined(const clang::Token &MacroName, + const clang::MacroDefinition &MD, + const clang::MacroDirective *Undef) override { + add(MacroName, MD.getMacroInfo()); + } + + void Ifdef(SourceLocation Loc, const Token &MacroName, + const MacroDefinition &MD) override { + add(MacroName, MD.getMacroInfo()); + } + + void Ifndef(SourceLocation Loc, const Token &MacroName, + const MacroDefinition &MD) override { + add(MacroName, MD.getMacroInfo()); + } + + void Defined(const Token &MacroName, const MacroDefinition &MD, + SourceRange Range) override { + add(MacroName, MD.getMacroInfo()); + } + private: void add(const Token &MacroNameTok, const MacroInfo *MI) { if (!InMainFile) @@ -57,7 +79,7 @@ class CollectMainFileMacros : public PPCallbacks { if (Loc.isMacroID()) return; - if (auto Range = getTokenRange(SM, LangOpts, MacroNameTok.getLocation())) { + if (auto Range = getTokenRange(SM, LangOpts, Loc)) { Out.Names.insert(MacroNameTok.getIdentifierInfo()->getName()); Out.Ranges.push_back(*Range); } diff --git a/clang-tools-extra/clangd/unittests/SemanticHighlightingTests.cpp b/clang-tools-extra/clangd/unittests/SemanticHighlightingTests.cpp index 06c8d3ae1b6ca..8003e359bbb46 100644 --- a/clang-tools-extra/clangd/unittests/SemanticHighlightingTests.cpp +++ b/clang-tools-extra/clangd/unittests/SemanticHighlightingTests.cpp @@ -475,6 +475,20 @@ TEST(SemanticHighlighting, GetsCorrectTokens) { $Macro[[assert]]($Variable[[x]] != $Variable[[y]]); $Macro[[assert]]($Variable[[x]] != $Function[[f]]()); } + )cpp", + // highlighting all macro references + R"cpp( + #ifndef $Macro[[name]] + #define $Macro[[name]] + #endif + + #define $Macro[[test]] + #undef $Macro[[test]] + #ifdef $Macro[[test]] + #endif + + #if defined($Macro[[test]]) + #endif )cpp", R"cpp( struct $Class[[S]] { From 5ce8c391499cd6003a784d1c187d680da579efc0 Mon Sep 17 00:00:00 2001 From: George Rimar Date: Mon, 7 Oct 2019 10:29:38 +0000 Subject: [PATCH 094/254] [llvm-readelf/llvm-objdump] - Improve/refactor the implementation of SHT_LLVM_ADDRSIG section dumping. This patch: * Adds a llvm-readobj/llvm-readelf test file for SHT_LLVM_ADDRSIG sections. (we do not have any) * Enables dumping of SHT_LLVM_ADDRSIG with --all. * Changes the logic to report a warning instead of an error when something goes wrong during dumping (allows to continue dumping SHT_LLVM_ADDRSIG and other sections on error). * Refactors a piece of logic to a new toULEB128Array helper which might be used for GNU-style dumping implementation. Differential revision: https://reviews.llvm.org/D68383 llvm-svn: 373890 --- llvm/test/tools/llvm-readobj/all.test | 1 + llvm/test/tools/llvm-readobj/elf-addrsig.test | 84 +++++++++++++++++++ llvm/tools/llvm-readobj/ELFDumper.cpp | 82 ++++++++++++------ llvm/tools/llvm-readobj/llvm-readobj.cpp | 4 +- 4 files changed, 143 insertions(+), 28 deletions(-) create mode 100644 llvm/test/tools/llvm-readobj/elf-addrsig.test diff --git a/llvm/test/tools/llvm-readobj/all.test b/llvm/test/tools/llvm-readobj/all.test index 2fef5b8422775..ac27f38c3a327 100644 --- a/llvm/test/tools/llvm-readobj/all.test +++ b/llvm/test/tools/llvm-readobj/all.test @@ -14,6 +14,7 @@ # ALL: Version symbols { # ALL: SHT_GNU_verdef { # ALL: SHT_GNU_verneed { +# ALL: Addrsig [ # ALL: Notes [ # ALL: StackSizes [ diff --git a/llvm/test/tools/llvm-readobj/elf-addrsig.test b/llvm/test/tools/llvm-readobj/elf-addrsig.test new file mode 100644 index 0000000000000..a0c32ab593659 --- /dev/null +++ b/llvm/test/tools/llvm-readobj/elf-addrsig.test @@ -0,0 +1,84 @@ +## Show that llvm-readobj can dump SHT_LLVM_ADDRSIG sections. + +# RUN: yaml2obj --docnum=1 %s -o %t1.o +# RUN: llvm-readobj --addrsig %t1.o | FileCheck -DFILE=%t1.o %s --check-prefix LLVM +# RUN: not llvm-readelf --addrsig %t1.o 2>&1 | FileCheck -DFILE=%t1.o %s --check-prefix GNU + +# LLVM: Addrsig [ +# LLVM-NEXT: Sym: foo (1) +# LLVM-NEXT: Sym: bar (2) +# LLVM-NEXT: ] + +# GNU: error: '[[FILE]]': --addrsig: not implemented + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_DYN + Machine: EM_X86_64 +Sections: + - Name: .llvm_addrsig + Type: SHT_LLVM_ADDRSIG + Symbols: + - Name: foo + - Name: bar +Symbols: + - Name: foo + - Name: bar + +## Check that llvm-readobj dumps any SHT_LLVM_ADDRSIG section when --all +## is specified for LLVM style, but not for GNU style. +## TODO: Refine the llvm-readelf check when GNU-style dumping is implemented. + +# RUN: llvm-readobj --all %t1.o | FileCheck %s --check-prefix LLVM +# RUN: llvm-readelf --all %t1.o 2>&1 | FileCheck %s --implicit-check-not=warning --implicit-check-not=error + +## Check we report a warning when SHT_LLVM_ADDRSIG is broken (e.g. contains a malformed uleb128). + +# RUN: yaml2obj --docnum=2 %s -o %t2.o +# RUN: llvm-readobj --addrsig %t2.o 2>&1 | FileCheck %s -DFILE=%t2.o --check-prefix=MALFORMED + +# MALFORMED: warning: '[[FILE]]': malformed uleb128, extends past end + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_DYN + Machine: EM_X86_64 +Sections: + - Name: .llvm_addrsig + Type: SHT_LLVM_ADDRSIG + Content: "FF" + +## Check we report a warning when SHT_LLVM_ADDRSIG references a symbol that can't be +## dumped (e.g. the index value is larger than the number of symbols in .symtab). + +# RUN: yaml2obj --docnum=3 %s -o %t3.o +# RUN: llvm-readobj --addrsig %t3.o 2>&1 | FileCheck %s -DFILE=%t3.o --check-prefix=INVALID-INDEX + +# INVALID-INDEX: Addrsig [ +# INVALID-INDEX-NEXT: Sym: foo (1) +# INVALID-INDEX-EMPTY: +# INVALID-INDEX-NEXT: warning: '[[FILE]]': unable to get symbol from section [index 2]: invalid symbol index (255) +# INVALID-INDEX-NEXT: Sym: (255) +# INVALID-INDEX-NEXT: Sym: bar (2) +# INVALID-INDEX-NEXT: ] + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_DYN + Machine: EM_X86_64 +Sections: + - Name: .llvm_addrsig + Type: SHT_LLVM_ADDRSIG + Symbols: + - Index: 1 + - Index: 255 + - Index: 2 +Symbols: + - Name: foo + - Name: bar diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp index 7a7e17602c4e5..4e9cf213174f4 100644 --- a/llvm/tools/llvm-readobj/ELFDumper.cpp +++ b/llvm/tools/llvm-readobj/ELFDumper.cpp @@ -302,7 +302,7 @@ template class ELFDumper : public ObjDumper { void getSectionNameIndex(const Elf_Sym *Symbol, const Elf_Sym *FirstSym, StringRef &SectionName, unsigned &SectionIndex) const; - std::string getStaticSymbolName(uint32_t Index) const; + Expected getStaticSymbolName(uint32_t Index) const; std::string getDynamicString(uint64_t Value) const; StringRef getSymbolVersionByIndex(StringRef StrTab, uint32_t VersionSymbolIndex, @@ -754,17 +754,22 @@ static std::string maybeDemangle(StringRef Name) { } template -std::string ELFDumper::getStaticSymbolName(uint32_t Index) const { +Expected +ELFDumper::getStaticSymbolName(uint32_t Index) const { const ELFFile *Obj = ObjF->getELFFile(); - StringRef StrTable = unwrapOrError( - ObjF->getFileName(), Obj->getStringTableForSymtab(*DotSymtabSec)); - Elf_Sym_Range Syms = - unwrapOrError(ObjF->getFileName(), Obj->symbols(DotSymtabSec)); - if (Index >= Syms.size()) - reportError(createError("Invalid symbol index"), ObjF->getFileName()); - const Elf_Sym *Sym = &Syms[Index]; - return maybeDemangle( - unwrapOrError(ObjF->getFileName(), Sym->getName(StrTable))); + Expected SymOrErr = + Obj->getSymbol(DotSymtabSec, Index); + if (!SymOrErr) + return SymOrErr.takeError(); + + Expected StrTabOrErr = Obj->getStringTableForSymtab(*DotSymtabSec); + if (!StrTabOrErr) + return StrTabOrErr.takeError(); + + Expected NameOrErr = (*SymOrErr)->getName(*StrTabOrErr); + if (!NameOrErr) + return NameOrErr.takeError(); + return maybeDemangle(*NameOrErr); } template @@ -4047,7 +4052,7 @@ void GNUStyle::printCGProfile(const ELFFile *Obj) { template void GNUStyle::printAddrsig(const ELFFile *Obj) { - OS << "GNUStyle::printAddrsig not implemented\n"; + reportError(createError("--addrsig: not implemented"), this->FileName); } static StringRef getGenericNoteTypeName(const uint32_t NT) { @@ -5723,14 +5728,35 @@ void LLVMStyle::printCGProfile(const ELFFile *Obj) { this->dumper()->getDotCGProfileSec())); for (const Elf_CGProfile &CGPE : CGProfile) { DictScope D(W, "CGProfileEntry"); - W.printNumber("From", this->dumper()->getStaticSymbolName(CGPE.cgp_from), - CGPE.cgp_from); - W.printNumber("To", this->dumper()->getStaticSymbolName(CGPE.cgp_to), - CGPE.cgp_to); + W.printNumber( + "From", + unwrapOrError(this->FileName, + this->dumper()->getStaticSymbolName(CGPE.cgp_from)), + CGPE.cgp_from); + W.printNumber( + "To", + unwrapOrError(this->FileName, + this->dumper()->getStaticSymbolName(CGPE.cgp_to)), + CGPE.cgp_to); W.printNumber("Weight", CGPE.cgp_weight); } } +static Expected> toULEB128Array(ArrayRef Data) { + std::vector Ret; + const uint8_t *Cur = Data.begin(); + const uint8_t *End = Data.end(); + while (Cur != End) { + unsigned Size; + const char *Err; + Ret.push_back(decodeULEB128(Cur, &Size, End, &Err)); + if (Err) + return createError(Err); + Cur += Size; + } + return Ret; +} + template void LLVMStyle::printAddrsig(const ELFFile *Obj) { ListScope L(W, "Addrsig"); @@ -5739,18 +5765,20 @@ void LLVMStyle::printAddrsig(const ELFFile *Obj) { ArrayRef Contents = unwrapOrError( this->FileName, Obj->getSectionContents(this->dumper()->getDotAddrsigSec())); - const uint8_t *Cur = Contents.begin(); - const uint8_t *End = Contents.end(); - while (Cur != End) { - unsigned Size; - const char *Err; - uint64_t SymIndex = decodeULEB128(Cur, &Size, End, &Err); - if (Err) - reportError(createError(Err), this->FileName); + Expected> V = toULEB128Array(Contents); + if (!V) { + reportWarning(V.takeError(), this->FileName); + return; + } - W.printNumber("Sym", this->dumper()->getStaticSymbolName(SymIndex), - SymIndex); - Cur += Size; + for (uint64_t Sym : *V) { + Expected NameOrErr = this->dumper()->getStaticSymbolName(Sym); + if (NameOrErr) { + W.printNumber("Sym", *NameOrErr, Sym); + continue; + } + reportWarning(NameOrErr.takeError(), this->FileName); + W.printNumber("Sym", "", Sym); } } diff --git a/llvm/tools/llvm-readobj/llvm-readobj.cpp b/llvm/tools/llvm-readobj/llvm-readobj.cpp index b85df6bae05d7..4db13897879d1 100644 --- a/llvm/tools/llvm-readobj/llvm-readobj.cpp +++ b/llvm/tools/llvm-readobj/llvm-readobj.cpp @@ -691,8 +691,10 @@ int main(int argc, const char *argv[]) { opts::UnwindInfo = true; opts::SectionGroups = true; opts::HashHistogram = true; - if (opts::Output == opts::LLVM) + if (opts::Output == opts::LLVM) { + opts::Addrsig = true; opts::PrintStackSizes = true; + } } if (opts::Headers) { From 2c082b48274fcba62bf9b3acb63075aedcc7a976 Mon Sep 17 00:00:00 2001 From: Konrad Kleine Date: Mon, 7 Oct 2019 10:32:16 +0000 Subject: [PATCH 095/254] [lldb][ELF] Read symbols from .gnu_debugdata sect. Summary: If the .symtab section is stripped from the binary it might be that there's a .gnu_debugdata section which contains a smaller .symtab in order to provide enough information to create a backtrace with function names or to set and hit a breakpoint on a function name. This change looks for a .gnu_debugdata section in the ELF object file. The .gnu_debugdata section contains a xz-compressed ELF file with a .symtab section inside. Symbols from that compressed .symtab section are merged with the main object file's .dynsym symbols (if any). In addition we always load the .dynsym even if there's a .symtab section. For example, the Fedora and RHEL operating systems strip their binaries but keep a .gnu_debugdata section. While gdb already can read this section, LLDB until this patch couldn't. To test this patch on a Fedora or RHEL operating system, try to set a breakpoint on the "help" symbol in the "zip" binary. Before this patch, only GDB can set this breakpoint; now LLDB also can do so without installing extra debug symbols: lldb /usr/bin/zip -b -o "b help" -o "r" -o "bt" -- -h The above line runs LLDB in batch mode and on the "/usr/bin/zip -h" target: (lldb) target create "/usr/bin/zip" Current executable set to '/usr/bin/zip' (x86_64). (lldb) settings set -- target.run-args "-h" Before the program starts, we set a breakpoint on the "help" symbol: (lldb) b help Breakpoint 1: where = zip`help, address = 0x00000000004093b0 Once the program is run and has hit the breakpoint we ask for a backtrace: (lldb) r Process 10073 stopped * thread #1, name = 'zip', stop reason = breakpoint 1.1 frame #0: 0x00000000004093b0 zip`help zip`help: -> 0x4093b0 <+0>: pushq %r12 0x4093b2 <+2>: movq 0x2af5f(%rip), %rsi ; + 4056 0x4093b9 <+9>: movl $0x1, %edi 0x4093be <+14>: xorl %eax, %eax Process 10073 launched: '/usr/bin/zip' (x86_64) (lldb) bt * thread #1, name = 'zip', stop reason = breakpoint 1.1 * frame #0: 0x00000000004093b0 zip`help frame #1: 0x0000000000403970 zip`main + 3248 frame #2: 0x00007ffff7d8bf33 libc.so.6`__libc_start_main + 243 frame #3: 0x0000000000408cee zip`_start + 46 In order to support the .gnu_debugdata section, one has to have LZMA development headers installed. The CMake section, that controls this part looks for the LZMA headers and enables .gnu_debugdata support by default if they are found; otherwise or if explicitly requested, the minidebuginfo support is disabled. GDB supports the "mini debuginfo" section .gnu_debugdata since v7.6 (2013). Reviewers: espindola, labath, jankratochvil, alexshap Reviewed By: labath Subscribers: rnkovacs, wuzish, shafik, emaste, mgorny, arichardson, hiraditya, MaskRay, lldb-commits Tags: #lldb, #llvm Differential Revision: https://reviews.llvm.org/D66791 llvm-svn: 373891 --- lldb/cmake/modules/LLDBConfig.cmake | 8 + lldb/include/lldb/Host/Config.h.cmake | 2 + lldb/include/lldb/Host/LZMA.h | 34 ++++ lldb/lit/CMakeLists.txt | 2 + .../Modules/ELF/Inputs/minidebuginfo-main.c | 12 ++ .../Modules/ELF/minidebuginfo-corrupt-xz.yaml | 29 ++++ .../ELF/minidebuginfo-find-symbols.yaml | 26 ++++ .../Modules/ELF/minidebuginfo-no-lzma.yaml | 29 ++++ .../minidebuginfo-set-and-hit-breakpoint.test | 86 +++++++++++ lldb/lit/lit.cfg.py | 7 + lldb/lit/lit.site.cfg.py.in | 1 + lldb/source/Host/CMakeLists.txt | 4 + lldb/source/Host/common/LZMA.cpp | 146 ++++++++++++++++++ .../Plugins/ObjectFile/ELF/ObjectFileELF.cpp | 91 ++++++++++- .../Plugins/ObjectFile/ELF/ObjectFileELF.h | 12 ++ 15 files changed, 481 insertions(+), 8 deletions(-) create mode 100644 lldb/include/lldb/Host/LZMA.h create mode 100644 lldb/lit/Modules/ELF/Inputs/minidebuginfo-main.c create mode 100644 lldb/lit/Modules/ELF/minidebuginfo-corrupt-xz.yaml create mode 100644 lldb/lit/Modules/ELF/minidebuginfo-find-symbols.yaml create mode 100644 lldb/lit/Modules/ELF/minidebuginfo-no-lzma.yaml create mode 100644 lldb/lit/Modules/ELF/minidebuginfo-set-and-hit-breakpoint.test create mode 100644 lldb/source/Host/common/LZMA.cpp diff --git a/lldb/cmake/modules/LLDBConfig.cmake b/lldb/cmake/modules/LLDBConfig.cmake index e6e78a831825d..e409d737d7608 100644 --- a/lldb/cmake/modules/LLDBConfig.cmake +++ b/lldb/cmake/modules/LLDBConfig.cmake @@ -1,5 +1,6 @@ include(CheckCXXSymbolExists) include(CheckTypeSize) +include(CMakeDependentOption) set(LLDB_PROJECT_ROOT ${CMAKE_CURRENT_SOURCE_DIR}) set(LLDB_SOURCE_ROOT "${CMAKE_CURRENT_SOURCE_DIR}/source") @@ -384,6 +385,13 @@ endif() set(LLDB_VERSION "${LLDB_VERSION_MAJOR}.${LLDB_VERSION_MINOR}.${LLDB_VERSION_PATCH}${LLDB_VERSION_SUFFIX}") message(STATUS "LLDB version: ${LLDB_VERSION}") +find_package(LibLZMA) +cmake_dependent_option(LLDB_ENABLE_LZMA "Support LZMA compression" ON "LIBLZMA_FOUND" OFF) +if (LLDB_ENABLE_LZMA) + include_directories(${LIBLZMA_INCLUDE_DIRS}) +endif() +llvm_canonicalize_cmake_booleans(LLDB_ENABLE_LZMA) + include_directories(BEFORE ${CMAKE_CURRENT_BINARY_DIR}/include ${CMAKE_CURRENT_SOURCE_DIR}/include diff --git a/lldb/include/lldb/Host/Config.h.cmake b/lldb/include/lldb/Host/Config.h.cmake index 7f152437fe528..662c07668d141 100644 --- a/lldb/include/lldb/Host/Config.h.cmake +++ b/lldb/include/lldb/Host/Config.h.cmake @@ -35,4 +35,6 @@ #cmakedefine HAVE_LIBCOMPRESSION #endif +#cmakedefine01 LLDB_ENABLE_LZMA + #endif // #ifndef LLDB_HOST_CONFIG_H diff --git a/lldb/include/lldb/Host/LZMA.h b/lldb/include/lldb/Host/LZMA.h new file mode 100644 index 0000000000000..c741cc3bbde15 --- /dev/null +++ b/lldb/include/lldb/Host/LZMA.h @@ -0,0 +1,34 @@ +//===-- LZMA.h --------------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef liblldb_Host_LZMA_h_ +#define liblldb_Host_LZMA_h_ + +#include "llvm/ADT/ArrayRef.h" + +namespace llvm { +class Error; +} // End of namespace llvm + +namespace lldb_private { + +namespace lzma { + +bool isAvailable(); + +llvm::Expected +getUncompressedSize(llvm::ArrayRef InputBuffer); + +llvm::Error uncompress(llvm::ArrayRef InputBuffer, + llvm::SmallVectorImpl &Uncompressed); + +} // End of namespace lzma + +} // End of namespace lldb_private + +#endif // liblldb_Host_LZMA_h_ diff --git a/lldb/lit/CMakeLists.txt b/lldb/lit/CMakeLists.txt index bb4314427e836..fbd64afbb1c92 100644 --- a/lldb/lit/CMakeLists.txt +++ b/lldb/lit/CMakeLists.txt @@ -56,9 +56,11 @@ list(APPEND LLDB_TEST_DEPS lli llvm-config llvm-dwarfdump + llvm-nm llvm-mc llvm-objcopy llvm-readobj + llvm-strip ) if(TARGET lld) diff --git a/lldb/lit/Modules/ELF/Inputs/minidebuginfo-main.c b/lldb/lit/Modules/ELF/Inputs/minidebuginfo-main.c new file mode 100644 index 0000000000000..8f11dafece689 --- /dev/null +++ b/lldb/lit/Modules/ELF/Inputs/minidebuginfo-main.c @@ -0,0 +1,12 @@ +// This function will be embedded within the .symtab section of the +// .gnu_debugdata section. +int multiplyByFour(int num) { return num * 4; } + +// This function will be embedded within the .dynsym section of the main binary. +int multiplyByThree(int num) { return num * 3; } + +int main(int argc, char *argv[]) { + int x = multiplyByThree(argc); + int y = multiplyByFour(x); + return y; +} diff --git a/lldb/lit/Modules/ELF/minidebuginfo-corrupt-xz.yaml b/lldb/lit/Modules/ELF/minidebuginfo-corrupt-xz.yaml new file mode 100644 index 0000000000000..cec34b9c62332 --- /dev/null +++ b/lldb/lit/Modules/ELF/minidebuginfo-corrupt-xz.yaml @@ -0,0 +1,29 @@ +# REQUIRES: lzma + +# This test checks that an error occurs when a corrupted +# .gnu_debugdata section is trying to be xz uncompressed. + +# RUN: yaml2obj %s > %t.obj + +# TODO(kwk): once yaml2obj doesn't auto-generate a .symtab section +# when there's none in YAML, remove the following line: + +# RUN: llvm-objcopy --remove-section=.symtab %t.obj + +# RUN: %lldb -b -o 'image dump symtab' %t.obj 2>&1 | FileCheck %s + +# CHECK: warning: (x86_64) {{.*}}.obj An error occurred while decompression the section .gnu_debugdata: lzma_stream_buffer_decode()=lzma error: LZMA_DATA_ERROR + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_X86_64 + Entry: 0x00000000004004C0 +Sections: + - Name: .gnu_debugdata + Type: SHT_PROGBITS + AddressAlign: 0x0000000000000001 + Content: FD377A585A000004E6D6B4460200210116000000742FE5A3E0180F05BA5D003F914584683D89A6DA8ACC93E24ED90802EC1FE2A7102958F4A42B6A7134F23922F6F35F529E133A8B5588025CFAC876C68510A157DBBCF8CA75E9854DED10FDD5CE0CDC136F6459B13B9847AEF79E9B1C7CD70EF4F3AF709F5DA0C1F40780154D72120A6A62A3F1A216E20DC597CE55BB23B48785957321A15FEE48808C1428B925DBC8022541CC594BD0AF2B51C6BE2854C81611017704DF6E509D21013B80BEC27D8919ACD3157E89353A08F4C86781ED708E89AB322D010F0F1605DAD9B9CE2B13C387769C83F5F85C647FD9C551E0E9C7D4A5CBE297970E486CB94AC283F98A7C6412A57F9C37952327549EEC4634D2CFA55B0F99923A14992D4293E0D87CEEF7FB6160C45928DE25074EEBF5329B5579AF01DB23DF22CBD48C8037B68FFFBE5CEA6CD26A936DD07D9B2E6006B7C6E5CC751072185EFE995D3F3C8DACF9039D4BEFB1F376B491568F6F00DB50FF477F36B90413E4FA30AE7C561A1249FD45FDFF884F70247FC21E57195A764151D8E341267E724D856C512BD243CDB33AB313758443877B2CB58F7F8F0461DE9766647F333A3531BDC4A26E9537EB314708D31212FCF4C21E9CB139F4DBFD21BB16A126C35E2BB3F7E30BF5A54961CECD4DD4D91A3757356F618754B21533C34F2BD97D70A02B1F338588BDBA9CDF5FC9FBE973E550194F07EC7A1E8E3C005FD60F8853223427628987E82E701CA7E2FDFA1B0ED564C37D115A72C3EC01E29C85C3630D8A385C4AE12F4F75F9F0BC12F2698345DD62A1F546A5953AF5CF3C0F22C7DA510F6739EB8CDB0E8A5A3BC13CFC31C1875C313908EFF23678869B76A6E1C10FE699E43BFFDE8F0752ED994A4A84BC0AD9D7381131D457C4917C4F6656F5C95D3221A79166C802D5F5A7C68554E54C42CA535465D224C7B641CF3417C3EAFD03CE5709BEA33DC7C9155CAC9D3C8033AF7CDA622020606A7C139D77FF85BC19323BF956C9C4662F60079BC7FE5F67B46211716A1A6CE4AB8AAB307D6444310CBC101071703EECC0B4622D91D705F5DA2932DA8BCEDA8E1CB0CDB20AAD652B8F86A521D3421287F1C175AE3BE6458AE6F8F3FB6FB7ED97B616B580D791E5FE0B74973F8604F419039B5B9D9A14397EE509F2B33AE404FF96DD0551472C5302E67910F0794B15CFE837351C6AF89B2FE88488B557BE8ACFFA331FB7AD553D35CAEB7D8BCEFB6CFF4A58E91355FE931408CF4CAFA9B97518B9E5C02078F64CE81279801B090348213DCAA7D12DC098BFF58C5A3202EFC38F64AD894379747B54AB5A9843F82E5FF1F394C8B78344A8F1655DDEF8D5FE09EBB3E703853ABD716743507000696FB6B35216B088E499F53880375521442ED45DCDD1B31AAEBDAD3C7DA958593425206C4B2A0BC6CADE3B0B1598499E08016E84F33E3EB9D7B03B9C9DFA91B8CE5C74DEF2BC97FEE9982B0AEC16C75EEB7AE9A858A9C37F6C12B040C68A49111DCF0F3A4780F3879E93D904676BE908FDC66373D34AA715A39EFBC2795C6C8F058CA24392FB2591AD06ACD6AED8746F926886180C2B007ED58C9884A8BEF6CCA1F549F5C4FB411A3FF78770D1147363AC80B98B5A8FDB3DEC4E61709F66A622BDA835B1FD67B7C7CB166ABB163FB7C5204AE200C71C6A18B532C407647323B8F2FAC7ECB68C250877FC8DD5FE05B2B39E66F687EBB6EEFB7D5209E22F451B76F57D90BB6641DFFDE1A1821C4D783E4756F3CEE7F63B9BA284F8E114B0D9A086D83233BED4A8F5B60933DC16AF4DDE19C9FC59BCC1646343ECE7007B1C4DC65C4A939CDD47F6ED8855913183149BECE66D8FE7793AE607EB8E28513749B9548252764110D3B58D1D8B348DB18F7F24F8CA0C7D9CB515D90F7F1848FF58472B2EF52EBAB123AFC7F87890CE9FC55B31160014294A9B7F81638A27335E29E15A10B1068D5E049B1C239814DBBCC1BB30E11EEBAD5ACF8FB1B986C4F48D73FEA6129D9708A0B5AC435402BEC8C79C71DB94394811B9A604141A125A4669F9A139A0264E93E822117BE8E0D93A1487C51214E9FBF5763A3FBE9DA700B9C9B435472AF9F0B4446B000000003239307DD8B645100001D60B90300000CA1EC9E9B1C467FB020000000004595A +... diff --git a/lldb/lit/Modules/ELF/minidebuginfo-find-symbols.yaml b/lldb/lit/Modules/ELF/minidebuginfo-find-symbols.yaml new file mode 100644 index 0000000000000..230ce8bb1c338 --- /dev/null +++ b/lldb/lit/Modules/ELF/minidebuginfo-find-symbols.yaml @@ -0,0 +1,26 @@ +# REQUIRES: lzma + +# RUN: yaml2obj %s > %t.obj + +# TODO(kwk): once yaml2obj doesn't auto-generate a .symtab section +# when there's none in YAML, remove the following line: + +# RUN: llvm-objcopy --remove-section=.symtab %t.obj + +# RUN: %lldb -b -o 'image dump symtab' %t.obj | FileCheck %s + +# CHECK: [ 0] 1 X Code 0x00000000004005b0 0x000000000000000f 0x00000012 multiplyByFour + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_X86_64 + Entry: 0x00000000004004C0 +Sections: + - Name: .gnu_debugdata + Type: SHT_PROGBITS + AddressAlign: 0x0000000000000001 + Content: FD377A585A000004E6D6B4460200210116000000742FE5A3E0180F05BA5D003F914584683D89A6DA8ACC93E24ED90802EC1FE2A7102958F4A42B6A7134F23922F6F35F529E133A8B5588025CFAC876C68510A157DBBCF8CA75E9854DED10FDD5CE0CDC136F6459B13B9847AEF79E9B1C7CD70EF4F3AF709F5DA0C1F40780154D72120A6A62A3F1A216E20DC597CE55BB23B48785957321A15FEE48808C1428B925DBC8022541CC594BD0AF2B51C6BE2854C81611017704DF6E509D21013B80BEC27D8919ACD3157E89353A08F4C86781ED708E89AB322D010F0F1605DAD9B9CE2B13C387769C83F5F85C647FD9C551E0E9C7D4A5CBE297970E486CB94AC283F98A7C6412A57F9C37952327549EEC4634D2CFA55B0F99923A14992D4293E0D87CEEF7FB6160C45928DE25074EEBF5329B5579AF01DB23DF22CBD48C8037B68FFFBE5CEA6CD26A936DD07D9B2E6006B7C6E5CC751072185EFE995D3F3C8DACF9039D4BEFB1F376B491568F6F00DB50FF477F36B90413E4FA30AE7C561A1249FD45FDFF884F70247FC21E57195A764151D8E341267E724D856C512BD243CDB33AB313758443877B2CB58F7F8F0461DE9766647F333A3531BDC4A26E9537EB314708D31212FCF4C21E9CB139F4DBFD21BB16A126C35E2BB3F7E30BF5A54961CECD4DD4D91A3757356F618754B21533C34F2BD97D70A02B1F338588BDBA9CDF5FC9FBE973E550194F07EC7A1E8E3C005FD60F8853223427628987E82E701CA7E2FDFA1B0ED564C37D115A72C3EC01E29C85C3630D8A385C4AE12F4F75F9F0BC12F2698345DD62A1F546A5953AF5CF3C0F22C7DA510F6739EB8CDB0E8A5A3BC13CFC31C1875C313908EFF23678869B76A6E1C10FE699E43BFFDE8F0752ED994A4A84BC0AD9D7381131D457C4917C4F6656F5C95D3221A79166C802D5F5A7C68554E54C42CA535465D224C7B641CF3417C3EAFD03CE5709BEA33DC7C9155CAC9D3C8033AF7CDA622020606A7C139D77FF85BC19323BF956C9C4662F60079BC7FE5F67B46211716A1A6CE4AB8AAB307D6444310CBC101071703EECC0B4622D91D705F5DA2932DA8BCEDA8E1CB0CDB20AAD652B8F86A521D3421287F1C175AE3BE6458AE6F8F3FB6FB7ED97B616B580D791E5FE0B74973F8604F419039B5B9D9A14397EE509F2B33AE404FF96DD0551472C5302E67910F0794B15CFE837351C6AF89B2FE88488B557BE8ACFFA331FB7AD553D35CAEB7D8BCEFB6CFF4A58E91355FE931408CF4CAFA9B97518B9E5C02078F64CE81279801B090348213DCAA7D12DC098BFF58C5A3202EFC38F64AD894379747B54AB5A9843F82E5FF1F394C8B783C3A8F1655DDEF8D5FE09EBB3E703853ABD716743507000696FB6B35216B088E499F53880375521442ED45DCDD1B31AAEBDAD3C7DA958593425206C4B2A0BC6CADE3B0B1598499E08016E84F33E3EB9D7B03B9C9DFA91B8CE5C74DEF2BC97FEE9982B0AEC16C75EEB7AE9A858A9C37F6C12B040C68A49111DCF0F3A4780F3879E93D904676BE908FDC66373D34AA715A39EFBC2795C6C8F058CA24392FB2591AD06ACD6AED8746F926886180C2B007ED58C9884A8BEF6CCA1F549F5C4FB411A3FF78770D1147363AC80B98B5A8FDB3DEC4E61709F66A622BDA835B1FD67B7C7CB166ABB163FB7C5204AE200C71C6A18B532C407647323B8F2FAC7ECB68C250877FC8DD5FE05B2B39E66F687EBB6EEFB7D5209E22F451B76F57D90BB6641DFFDE1A1821C4D783E4756F3CEE7F63B9BA284F8E114B0D9A086D83233BED4A8F5B60933DC16AF4DDE19C9FC59BCC1646343ECE7007B1C4DC65C4A939CDD47F6ED8855913183149BECE66D8FE7793AE607EB8E28513749B9548252764110D3B58D1D8B348DB18F7F24F8CA0C7D9CB515D90F7F1848FF58472B2EF52EBAB123AFC7F87890CE9FC55B31160014294A9B7F81638A27335E29E15A10B1068D5E049B1C239814DBBCC1BB30E11EEBAD5ACF8FB1B986C4F48D73FEA6129D9708A0B5AC435402BEC8C79C71DB94394811B9A604141A125A4669F9A139A0264E93E822117BE8E0D93A1487C51214E9FBF5763A3FBE9DA700B9C9B435472AF9F0B4446B000000003239307DD8B645100001D60B90300000CA1EC9E9B1C467FB020000000004595A +... diff --git a/lldb/lit/Modules/ELF/minidebuginfo-no-lzma.yaml b/lldb/lit/Modules/ELF/minidebuginfo-no-lzma.yaml new file mode 100644 index 0000000000000..a127109e991ab --- /dev/null +++ b/lldb/lit/Modules/ELF/minidebuginfo-no-lzma.yaml @@ -0,0 +1,29 @@ +# REQUIRES: !lzma + +# This test checks that a warning is printed when we're trying +# to decompress a .gnu_debugdata section when no LZMA support was compiled in. + +# RUN: yaml2obj %s > %t.obj + +# TODO(kwk): once yaml2obj doesn't auto-generate a .symtab section +# when there's none in YAML, remove the following line: + +# RUN: llvm-objcopy --remove-section=.symtab %t.obj + +# RUN: %lldb -b -o 'image dump symtab' %t.obj 2>&1 | FileCheck %s + +# CHECK: warning: (x86_64) {{.*}}.obj No LZMA support found for reading .gnu_debugdata section + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_X86_64 + Entry: 0x00000000004004C0 +Sections: + - Name: .gnu_debugdata + Type: SHT_PROGBITS + AddressAlign: 0x0000000000000001 + Content: FD377A585A000004E6D6B4460200210116000000742FE5A3E0180F05BA5D003F914584683D89A6DA8ACC93E24ED90802EC1FE2A7102958F4A42B6A7134F23922F6F35F529E133A8B5588025CFAC876C68510A157DBBCF8CA75E9854DED10FDD5CE0CDC136F6459B13B9847AEF79E9B1C7CD70EF4F3AF709F5DA0C1F40780154D72120A6A62A3F1A216E20DC597CE55BB23B48785957321A15FEE48808C1428B925DBC8022541CC594BD0AF2B51C6BE2854C81611017704DF6E509D21013B80BEC27D8919ACD3157E89353A08F4C86781ED708E89AB322D010F0F1605DAD9B9CE2B13C387769C83F5F85C647FD9C551E0E9C7D4A5CBE297970E486CB94AC283F98A7C6412A57F9C37952327549EEC4634D2CFA55B0F99923A14992D4293E0D87CEEF7FB6160C45928DE25074EEBF5329B5579AF01DB23DF22CBD48C8037B68FFFBE5CEA6CD26A936DD07D9B2E6006B7C6E5CC751072185EFE995D3F3C8DACF9039D4BEFB1F376B491568F6F00DB50FF477F36B90413E4FA30AE7C561A1249FD45FDFF884F70247FC21E57195A764151D8E341267E724D856C512BD243CDB33AB313758443877B2CB58F7F8F0461DE9766647F333A3531BDC4A26E9537EB314708D31212FCF4C21E9CB139F4DBFD21BB16A126C35E2BB3F7E30BF5A54961CECD4DD4D91A3757356F618754B21533C34F2BD97D70A02B1F338588BDBA9CDF5FC9FBE973E550194F07EC7A1E8E3C005FD60F8853223427628987E82E701CA7E2FDFA1B0ED564C37D115A72C3EC01E29C85C3630D8A385C4AE12F4F75F9F0BC12F2698345DD62A1F546A5953AF5CF3C0F22C7DA510F6739EB8CDB0E8A5A3BC13CFC31C1875C313908EFF23678869B76A6E1C10FE699E43BFFDE8F0752ED994A4A84BC0AD9D7381131D457C4917C4F6656F5C95D3221A79166C802D5F5A7C68554E54C42CA535465D224C7B641CF3417C3EAFD03CE5709BEA33DC7C9155CAC9D3C8033AF7CDA622020606A7C139D77FF85BC19323BF956C9C4662F60079BC7FE5F67B46211716A1A6CE4AB8AAB307D6444310CBC101071703EECC0B4622D91D705F5DA2932DA8BCEDA8E1CB0CDB20AAD652B8F86A521D3421287F1C175AE3BE6458AE6F8F3FB6FB7ED97B616B580D791E5FE0B74973F8604F419039B5B9D9A14397EE509F2B33AE404FF96DD0551472C5302E67910F0794B15CFE837351C6AF89B2FE88488B557BE8ACFFA331FB7AD553D35CAEB7D8BCEFB6CFF4A58E91355FE931408CF4CAFA9B97518B9E5C02078F64CE81279801B090348213DCAA7D12DC098BFF58C5A3202EFC38F64AD894379747B54AB5A9843F82E5FF1F394C8B783C3A8F1655DDEF8D5FE09EBB3E703853ABD716743507000696FB6B35216B088E499F53880375521442ED45DCDD1B31AAEBDAD3C7DA958593425206C4B2A0BC6CADE3B0B1598499E08016E84F33E3EB9D7B03B9C9DFA91B8CE5C74DEF2BC97FEE9982B0AEC16C75EEB7AE9A858A9C37F6C12B040C68A49111DCF0F3A4780F3879E93D904676BE908FDC66373D34AA715A39EFBC2795C6C8F058CA24392FB2591AD06ACD6AED8746F926886180C2B007ED58C9884A8BEF6CCA1F549F5C4FB411A3FF78770D1147363AC80B98B5A8FDB3DEC4E61709F66A622BDA835B1FD67B7C7CB166ABB163FB7C5204AE200C71C6A18B532C407647323B8F2FAC7ECB68C250877FC8DD5FE05B2B39E66F687EBB6EEFB7D5209E22F451B76F57D90BB6641DFFDE1A1821C4D783E4756F3CEE7F63B9BA284F8E114B0D9A086D83233BED4A8F5B60933DC16AF4DDE19C9FC59BCC1646343ECE7007B1C4DC65C4A939CDD47F6ED8855913183149BECE66D8FE7793AE607EB8E28513749B9548252764110D3B58D1D8B348DB18F7F24F8CA0C7D9CB515D90F7F1848FF58472B2EF52EBAB123AFC7F87890CE9FC55B31160014294A9B7F81638A27335E29E15A10B1068D5E049B1C239814DBBCC1BB30E11EEBAD5ACF8FB1B986C4F48D73FEA6129D9708A0B5AC435402BEC8C79C71DB94394811B9A604141A125A4669F9A139A0264E93E822117BE8E0D93A1487C51214E9FBF5763A3FBE9DA700B9C9B435472AF9F0B4446B000000003239307DD8B645100001D60B90300000CA1EC9E9B1C467FB020000000004595A +... diff --git a/lldb/lit/Modules/ELF/minidebuginfo-set-and-hit-breakpoint.test b/lldb/lit/Modules/ELF/minidebuginfo-set-and-hit-breakpoint.test new file mode 100644 index 0000000000000..33188d2b4b749 --- /dev/null +++ b/lldb/lit/Modules/ELF/minidebuginfo-set-and-hit-breakpoint.test @@ -0,0 +1,86 @@ +# REQUIRES: system-linux, lzma, xz + +# We want to keep the symbol "multiplyByThree" in the .dynamic section and not +# have it put the default .symtab section. +# RUN: echo "{multiplyByThree;};" > %T/dynmic-symbols.txt +# RUN: %clang -Wl,--dynamic-list=%T/dynmic-symbols.txt -g -o %t.binary %p/Inputs/minidebuginfo-main.c + +# The following section is adapted from GDB's official documentation: +# http://sourceware.org/gdb/current/onlinedocs/gdb/MiniDebugInfo.html#MiniDebugInfo + +# Extract the dynamic symbols from the main binary, there is no need +# to also have these in the normal symbol table. + +# IGNORE: llvm-nm -D %t.binary --format=posix --defined-only | awk '{ print $1 }' | sort > %t.dynsyms + +# Extract all the text (i.e. function) symbols from the debuginfo. +# (Note that we actually also accept "D" symbols, for the benefit +# of platforms like PowerPC64 that use function descriptors.) + +# IGNORE: llvm-nm %t.binary --format=posix --defined-only | awk '{ if ($2 == "T" || $2 == "t" || $2 == "D") print $1 }' | sort > %t.funcsyms + +# Keep all the function symbols not already in the dynamic symbol +# table. + +# IGNORE: comm -13 %t.dynsyms %t.funcsyms > %t.keep_symbols +# The result of the preceeding command can be preprocessed in %p/Inputs/minidebuginfo.keep_symbols +# because we know what symbol to keep. +# RUN: echo "multiplyByFour" > %p/Inputs/minidebuginfo.keep_symbols + +# Separate full debug info into debug binary. + +# RUN: llvm-objcopy --only-keep-debug %t.binary %t.debug + +# Copy the full debuginfo, keeping only a minimal set of symbols and +# removing some unnecessary sections. + +# RUN: llvm-objcopy -S --remove-section .gdb_index --remove-section .comment --keep-symbols=%p/Inputs/minidebuginfo.keep_symbols %t.debug %t.mini_debuginfo + +# This command is not from the GDB manual but it slims down embedded minidebug +# info. On top if that, it ensures that we only have the multiplyByThree symbol +# in the .dynsym section of the main binary. +# RUN: llvm-objcopy --remove-section=.rela.dyn --remove-section=.gnu.version --remove-section=.gnu.hash --remove-section=.dynsym %t.mini_debuginfo + +# Drop the full debug info from the original binary. + +# RUN: llvm-strip --strip-all -R .comment %t.binary + +# Inject the compressed data into the .gnu_debugdata section of the +# original binary. + +# RUN: xz --force --keep %t.mini_debuginfo + +# RUN: llvm-objcopy --add-section .gnu_debugdata=%t.mini_debuginfo.xz %t.binary + +# Now run the binary and see that we can set and hit a breakpoint +# from within the .dynsym section (multiplyByThree) and one from +# the .symtab section embedded in the .gnu_debugdata section (multiplyByFour). + +# RUN: %lldb -b -o 'b multiplyByThree' -o 'b multiplyByFour' -o 'run' -o 'continue' -o 'breakpoint list -v' %t.binary | FileCheck %s + +# CHECK: (lldb) b multiplyByThree +# CHECK-NEXT: Breakpoint 1: where = minidebuginfo-set-and-hit-breakpoint.test.tmp.binary`multiplyByThree, address = 0x{{.*}} + +# CHECK: (lldb) b multiplyByFour +# CHECK-NEXT: Breakpoint 2: where = minidebuginfo-set-and-hit-breakpoint.test.tmp.binary`multiplyByFour, address = 0x{{.*}} + +# CHECK: * thread #1, name = 'minidebuginfo-s', stop reason = breakpoint 1.1 +# CHECK: * thread #1, name = 'minidebuginfo-s', stop reason = breakpoint 2.1 + +# CHECK: (lldb) breakpoint list -v +# CHECK-NEXT: Current breakpoints: +# CHECK-NEXT: 1: name = 'multiplyByThree' +# CHECK-NEXT: 1.1: +# CHECK-NEXT: module = {{.*}}/minidebuginfo-set-and-hit-breakpoint.test.tmp.binary +# CHECK-NEXT: symbol = multiplyByThree +# CHECK-NEXT: address = 0x{{.*}} +# CHECK-NEXT: resolved = true +# CHECK-NEXT: hit count = 1 + +# CHECK: 2: name = 'multiplyByFour' +# CHECK-NEXT: 2.1: +# CHECK-NEXT: module = {{.*}}/minidebuginfo-set-and-hit-breakpoint.test.tmp.binary +# CHECK-NEXT: symbol = multiplyByFour +# CHECK-NEXT: address = 0x{{.*}} +# CHECK-NEXT: resolved = true +# CHECK-NEXT: hit count = 1 diff --git a/lldb/lit/lit.cfg.py b/lldb/lit/lit.cfg.py index 4cf0855df3642..208338ce95c18 100644 --- a/lldb/lit/lit.cfg.py +++ b/lldb/lit/lit.cfg.py @@ -11,6 +11,7 @@ from lit.llvm import llvm_config from lit.llvm.subst import FindTool from lit.llvm.subst import ToolSubst +from distutils.spawn import find_executable site.addsitedir(os.path.dirname(__file__)) from helper import toolchain @@ -98,3 +99,9 @@ def calculate_arch_features(arch_string): if not config.lldb_disable_python: config.available_features.add('python') + +if config.lldb_enable_lzma: + config.available_features.add('lzma') + +if find_executable('xz') != None: + config.available_features.add('xz') diff --git a/lldb/lit/lit.site.cfg.py.in b/lldb/lit/lit.site.cfg.py.in index 2ff07f7dab9d8..c9b6e09cc0e75 100644 --- a/lldb/lit/lit.site.cfg.py.in +++ b/lldb/lit/lit.site.cfg.py.in @@ -15,6 +15,7 @@ config.lldb_lit_tools_dir = r"@LLDB_LIT_TOOLS_DIR@" config.target_triple = "@TARGET_TRIPLE@" config.python_executable = "@PYTHON_EXECUTABLE@" config.have_zlib = @LLVM_ENABLE_ZLIB@ +config.lldb_enable_lzma = @LLDB_ENABLE_LZMA@ config.host_triple = "@LLVM_HOST_TRIPLE@" config.lldb_bitness = 64 if @LLDB_IS_64_BITS@ else 32 config.lldb_disable_python = @LLDB_DISABLE_PYTHON@ diff --git a/lldb/source/Host/CMakeLists.txt b/lldb/source/Host/CMakeLists.txt index 4bb8d363b577e..aa409bf24c9b0 100644 --- a/lldb/source/Host/CMakeLists.txt +++ b/lldb/source/Host/CMakeLists.txt @@ -29,6 +29,7 @@ add_host_subdirectory(common common/HostProcess.cpp common/HostThread.cpp common/LockFileBase.cpp + common/LZMA.cpp common/MainLoop.cpp common/MonitoringProcessLauncher.cpp common/NativeProcessProtocol.cpp @@ -157,6 +158,9 @@ endif() if (NOT LLDB_DISABLE_LIBEDIT) list(APPEND EXTRA_LIBS ${libedit_LIBRARIES}) endif() +if (LLDB_ENABLE_LZMA) + list(APPEND EXTRA_LIBS ${LIBLZMA_LIBRARIES}) +endif() if (NOT LLDB_DISABLE_LIBEDIT) list(APPEND LLDB_LIBEDIT_LIBS ${libedit_LIBRARIES}) diff --git a/lldb/source/Host/common/LZMA.cpp b/lldb/source/Host/common/LZMA.cpp new file mode 100644 index 0000000000000..47a15708c278f --- /dev/null +++ b/lldb/source/Host/common/LZMA.cpp @@ -0,0 +1,146 @@ +//===-- LZMA.cpp ------------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "lldb/Host/Config.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Error.h" + +#if LLDB_ENABLE_LZMA +#include +#endif // LLDB_ENABLE_LZMA + +namespace lldb_private { + +namespace lzma { + +#if !LLDB_ENABLE_LZMA +bool isAvailable() { return false; } +llvm::Expected +getUncompressedSize(llvm::ArrayRef InputBuffer) { + llvm_unreachable("lzma::getUncompressedSize is unavailable"); +} + +llvm::Error uncompress(llvm::ArrayRef InputBuffer, + llvm::SmallVectorImpl &Uncompressed) { + llvm_unreachable("lzma::uncompress is unavailable"); +} + +#else // LLDB_ENABLE_LZMA + +bool isAvailable() { return true; } + +static const char *convertLZMACodeToString(lzma_ret Code) { + switch (Code) { + case LZMA_STREAM_END: + return "lzma error: LZMA_STREAM_END"; + case LZMA_NO_CHECK: + return "lzma error: LZMA_NO_CHECK"; + case LZMA_UNSUPPORTED_CHECK: + return "lzma error: LZMA_UNSUPPORTED_CHECK"; + case LZMA_GET_CHECK: + return "lzma error: LZMA_GET_CHECK"; + case LZMA_MEM_ERROR: + return "lzma error: LZMA_MEM_ERROR"; + case LZMA_MEMLIMIT_ERROR: + return "lzma error: LZMA_MEMLIMIT_ERROR"; + case LZMA_FORMAT_ERROR: + return "lzma error: LZMA_FORMAT_ERROR"; + case LZMA_OPTIONS_ERROR: + return "lzma error: LZMA_OPTIONS_ERROR"; + case LZMA_DATA_ERROR: + return "lzma error: LZMA_DATA_ERROR"; + case LZMA_BUF_ERROR: + return "lzma error: LZMA_BUF_ERROR"; + case LZMA_PROG_ERROR: + return "lzma error: LZMA_PROG_ERROR"; + default: + llvm_unreachable("unknown or unexpected lzma status code"); + } +} + +llvm::Expected +getUncompressedSize(llvm::ArrayRef InputBuffer) { + lzma_stream_flags opts{}; + if (InputBuffer.size() < LZMA_STREAM_HEADER_SIZE) { + return llvm::createStringError( + llvm::inconvertibleErrorCode(), + "size of xz-compressed blob (%lu bytes) is smaller than the " + "LZMA_STREAM_HEADER_SIZE (%lu bytes)", + InputBuffer.size(), LZMA_STREAM_HEADER_SIZE); + } + + // Decode xz footer. + lzma_ret xzerr = lzma_stream_footer_decode( + &opts, InputBuffer.data() + InputBuffer.size() - LZMA_STREAM_HEADER_SIZE); + if (xzerr != LZMA_OK) { + return llvm::createStringError(llvm::inconvertibleErrorCode(), + "lzma_stream_footer_decode()=%s", + convertLZMACodeToString(xzerr)); + } + if (InputBuffer.size() < (opts.backward_size + LZMA_STREAM_HEADER_SIZE)) { + return llvm::createStringError( + llvm::inconvertibleErrorCode(), + "xz-compressed buffer size (%lu bytes) too small (required at " + "least %lu bytes) ", + InputBuffer.size(), (opts.backward_size + LZMA_STREAM_HEADER_SIZE)); + } + + // Decode xz index. + lzma_index *xzindex; + uint64_t memlimit(UINT64_MAX); + size_t inpos = 0; + xzerr = + lzma_index_buffer_decode(&xzindex, &memlimit, nullptr, + InputBuffer.data() + InputBuffer.size() - + LZMA_STREAM_HEADER_SIZE - opts.backward_size, + &inpos, InputBuffer.size()); + if (xzerr != LZMA_OK) { + return llvm::createStringError(llvm::inconvertibleErrorCode(), + "lzma_index_buffer_decode()=%s", + convertLZMACodeToString(xzerr)); + } + + // Get size of uncompressed file to construct an in-memory buffer of the + // same size on the calling end (if needed). + uint64_t uncompressedSize = lzma_index_uncompressed_size(xzindex); + + // Deallocate xz index as it is no longer needed. + lzma_index_end(xzindex, nullptr); + + return uncompressedSize; +} + +llvm::Error uncompress(llvm::ArrayRef InputBuffer, + llvm::SmallVectorImpl &Uncompressed) { + llvm::Expected uncompressedSize = getUncompressedSize(InputBuffer); + + if (auto err = uncompressedSize.takeError()) + return err; + + Uncompressed.resize(*uncompressedSize); + + // Decompress xz buffer to buffer. + uint64_t memlimit = UINT64_MAX; + size_t inpos = 0; + size_t outpos = 0; + lzma_ret ret = lzma_stream_buffer_decode( + &memlimit, 0, nullptr, InputBuffer.data(), &inpos, InputBuffer.size(), + Uncompressed.data(), &outpos, Uncompressed.size()); + if (ret != LZMA_OK) { + return llvm::createStringError(llvm::inconvertibleErrorCode(), + "lzma_stream_buffer_decode()=%s", + convertLZMACodeToString(ret)); + } + + return llvm::Error::success(); +} + +#endif // LLDB_ENABLE_LZMA + +} // end of namespace lzma +} // namespace lldb_private diff --git a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp index e507f0e4d745b..64e32e5aa41ea 100644 --- a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp +++ b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp @@ -18,6 +18,7 @@ #include "lldb/Core/PluginManager.h" #include "lldb/Core/Section.h" #include "lldb/Host/FileSystem.h" +#include "lldb/Host/LZMA.h" #include "lldb/Symbol/DWARFCallFrameInfo.h" #include "lldb/Symbol/SymbolContext.h" #include "lldb/Target/SectionLoadList.h" @@ -1842,6 +1843,70 @@ void ObjectFileELF::CreateSections(SectionList &unified_section_list) { // unified section list. if (GetType() != eTypeDebugInfo) unified_section_list = *m_sections_up; + + // If there's a .gnu_debugdata section, we'll try to read the .symtab that's + // embedded in there and replace the one in the original object file (if any). + // If there's none in the orignal object file, we add it to it. + if (auto gdd_obj_file = GetGnuDebugDataObjectFile()) { + if (auto gdd_objfile_section_list = gdd_obj_file->GetSectionList()) { + if (SectionSP symtab_section_sp = + gdd_objfile_section_list->FindSectionByType( + eSectionTypeELFSymbolTable, true)) { + SectionSP module_section_sp = unified_section_list.FindSectionByType( + eSectionTypeELFSymbolTable, true); + if (module_section_sp) + unified_section_list.ReplaceSection(module_section_sp->GetID(), + symtab_section_sp); + else + unified_section_list.AddSection(symtab_section_sp); + } + } + } +} + +std::shared_ptr ObjectFileELF::GetGnuDebugDataObjectFile() { + if (m_gnu_debug_data_object_file != nullptr) + return m_gnu_debug_data_object_file; + + SectionSP section = + GetSectionList()->FindSectionByName(ConstString(".gnu_debugdata")); + if (!section) + return nullptr; + + if (!lldb_private::lzma::isAvailable()) { + GetModule()->ReportWarning( + "No LZMA support found for reading .gnu_debugdata section"); + return nullptr; + } + + // Uncompress the data + DataExtractor data; + section->GetSectionData(data); + llvm::SmallVector uncompressedData; + auto err = lldb_private::lzma::uncompress(data.GetData(), uncompressedData); + if (err) { + GetModule()->ReportWarning( + "An error occurred while decompression the section %s: %s", + section->GetName().AsCString(), llvm::toString(std::move(err)).c_str()); + return nullptr; + } + + // Construct ObjectFileELF object from decompressed buffer + DataBufferSP gdd_data_buf( + new DataBufferHeap(uncompressedData.data(), uncompressedData.size())); + auto fspec = GetFileSpec().CopyByAppendingPathComponent( + llvm::StringRef("gnu_debugdata")); + m_gnu_debug_data_object_file.reset(new ObjectFileELF( + GetModule(), gdd_data_buf, 0, &fspec, 0, gdd_data_buf->GetByteSize())); + + // This line is essential; otherwise a breakpoint can be set but not hit. + m_gnu_debug_data_object_file->SetType(ObjectFile::eTypeDebugInfo); + + ArchSpec spec = m_gnu_debug_data_object_file->GetArchitecture(); + if (spec && m_gnu_debug_data_object_file->SetModulesArchitecture(spec)) + return m_gnu_debug_data_object_file; + + return nullptr; } // Find the arm/aarch64 mapping symbol character in the given symbol name. @@ -2649,19 +2714,29 @@ Symtab *ObjectFileELF::GetSymtab() { // while the reverse is not necessarily true. Section *symtab = section_list->FindSectionByType(eSectionTypeELFSymbolTable, true).get(); - if (!symtab) { - // The symtab section is non-allocable and can be stripped, so if it - // doesn't exist then use the dynsym section which should always be - // there. - symtab = - section_list->FindSectionByType(eSectionTypeELFDynamicSymbols, true) - .get(); - } if (symtab) { m_symtab_up.reset(new Symtab(symtab->GetObjectFile())); symbol_id += ParseSymbolTable(m_symtab_up.get(), symbol_id, symtab); } + // The symtab section is non-allocable and can be stripped, while the + // .dynsym section which should always be always be there. To support the + // minidebuginfo case we parse .dynsym when there's a .gnu_debuginfo + // section, nomatter if .symtab was already parsed or not. This is because + // minidebuginfo normally removes the .symtab symbols which have their + // matching .dynsym counterparts. + if (!symtab || + GetSectionList()->FindSectionByName(ConstString(".gnu_debugdata"))) { + Section *dynsym = + section_list->FindSectionByType(eSectionTypeELFDynamicSymbols, true) + .get(); + if (dynsym) { + if (!m_symtab_up) + m_symtab_up.reset(new Symtab(dynsym->GetObjectFile())); + symbol_id += ParseSymbolTable(m_symtab_up.get(), symbol_id, dynsym); + } + } + // DT_JMPREL // If present, this entry's d_ptr member holds the address of // relocation diff --git a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.h b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.h index f58618fed7970..3b273896cb598 100644 --- a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.h +++ b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.h @@ -208,6 +208,10 @@ class ObjectFileELF : public lldb_private::ObjectFile { /// Collection of symbols from the dynamic table. DynamicSymbolColl m_dynamic_symbols; + /// Object file parsed from .gnu_debugdata section (\sa + /// GetGnuDebugDataObjectFile()) + std::shared_ptr m_gnu_debug_data_object_file; + /// List of file specifications corresponding to the modules (shared /// libraries) on which this object file depends. mutable std::unique_ptr m_filespec_up; @@ -383,6 +387,14 @@ class ObjectFileELF : public lldb_private::ObjectFile { lldb_private::UUID &uuid); bool AnySegmentHasPhysicalAddress(); + + /// Takes the .gnu_debugdata and returns the decompressed object file that is + /// stored within that section. + /// + /// \returns either the decompressed object file stored within the + /// .gnu_debugdata section or \c nullptr if an error occured or if there's no + /// section with that name. + std::shared_ptr GetGnuDebugDataObjectFile(); }; #endif // liblldb_ObjectFileELF_h_ From 368e0f3757e27e6baac22a0961f491e9c31761bf Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Mon, 7 Oct 2019 10:53:56 +0000 Subject: [PATCH 096/254] [clangd] If an undocumented definition exists, don't accept documentation from other forward decls. Summary: This fixes cases like: foo.h class Undocumented{} bar.h // break an include cycle. we should refactor this! class Undocumented; Where the comment doesn't describe the class. Note that a forward decl that is *visible to the definition* will still have its doc comment used, by SymbolCollector: Merge isn't involved here. Reviewers: ilya-biryukov Subscribers: MaskRay, jkorous, arphaman, kadircet, usaxena95, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D68467 llvm-svn: 373892 --- clang-tools-extra/clangd/index/Merge.cpp | 5 ++++- clang-tools-extra/clangd/unittests/IndexTests.cpp | 10 ++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/clang-tools-extra/clangd/index/Merge.cpp b/clang-tools-extra/clangd/index/Merge.cpp index 829b7d02e4e14..76c5955893e47 100644 --- a/clang-tools-extra/clangd/index/Merge.cpp +++ b/clang-tools-extra/clangd/index/Merge.cpp @@ -186,7 +186,10 @@ Symbol mergeSymbol(const Symbol &L, const Symbol &R) { S.Signature = O.Signature; if (S.CompletionSnippetSuffix == "") S.CompletionSnippetSuffix = O.CompletionSnippetSuffix; - if (S.Documentation == "") + // Don't accept documentation from bare forward declarations, if there is a + // definition and it didn't provide one. S is often an undocumented class, + // and O is a non-canonical forward decl preceded by an irrelevant comment. + if (S.Documentation == "" && !S.Definition) S.Documentation = O.Documentation; if (S.ReturnType == "") S.ReturnType = O.ReturnType; diff --git a/clang-tools-extra/clangd/unittests/IndexTests.cpp b/clang-tools-extra/clangd/unittests/IndexTests.cpp index b3a5489614860..d4017c226d331 100644 --- a/clang-tools-extra/clangd/unittests/IndexTests.cpp +++ b/clang-tools-extra/clangd/unittests/IndexTests.cpp @@ -413,6 +413,16 @@ TEST(MergeIndexTest, Refs) { FileURI("unittest:///test2.cc")))))); } +TEST(MergeIndexTest, NonDocumentation) { + Symbol L, R; + L.ID = R.ID = SymbolID("x"); + L.Definition.FileURI = "file:/x.h"; + R.Documentation = "Forward declarations because x.h is too big to include"; + + Symbol M = mergeSymbol(L, R); + EXPECT_EQ(M.Documentation, ""); +} + MATCHER_P2(IncludeHeaderWithRef, IncludeHeader, References, "") { return (arg.IncludeHeader == IncludeHeader) && (arg.References == References); } From 301decd93d79ab1e8764a46abac16db14225088b Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Mon, 7 Oct 2019 10:57:41 +0000 Subject: [PATCH 097/254] [AMDGPU] Fix test checks The GFX10-DENORM-STRICT checks were only passing by accident. Fix them to make the test more robust in the face of scheduling or register allocation changes. llvm-svn: 373893 --- llvm/test/CodeGen/AMDGPU/fmuladd.f16.ll | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/llvm/test/CodeGen/AMDGPU/fmuladd.f16.ll b/llvm/test/CodeGen/AMDGPU/fmuladd.f16.ll index 0ab2e6710c3b8..248cbe6ab5cc7 100644 --- a/llvm/test/CodeGen/AMDGPU/fmuladd.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/fmuladd.f16.ll @@ -331,7 +331,8 @@ define amdgpu_kernel void @mad_sub_f16(half addrspace(1)* noalias nocapture %out ; GFX10-FLUSH: v_mul_f16_e32 [[TMP:v[0-9]+]], [[REGA]], [[REGB]] ; GFX10-FLUSH: v_sub_f16_e32 [[RESULT:v[0-9]+]], [[REGC]], [[TMP]] ; GFX10-FLUSH: global_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] -; GFX10-DENORM: global_store_short v{{\[[0-9]+:[0-9]+\]}}, [[REGC]] +; GFX10-DENORM-STRICT: global_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] +; GFX10-DENORM-CONTRACT: global_store_short v{{\[[0-9]+:[0-9]+\]}}, [[REGC]] define amdgpu_kernel void @mad_sub_inv_f16(half addrspace(1)* noalias nocapture %out, half addrspace(1)* noalias nocapture readonly %ptr) #1 { %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0 %tid.ext = sext i32 %tid to i64 @@ -439,7 +440,8 @@ define amdgpu_kernel void @mad_sub_fabs_inv_f16(half addrspace(1)* noalias nocap ; GFX10-FLUSH: v_mul_f16_e32 [[TMP:v[0-9]+]], [[REGA]], [[REGB]] ; GFX10-FLUSH: v_add_f16_e32 [[RESULT:v[0-9]+]], [[REGC]], [[TMP]] ; GFX10-FLUSH: global_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] -; GFX10-DENORM: global_store_short v{{\[[0-9]+:[0-9]+\]}}, [[REGC]] +; GFX10-DENORM-STRICT: global_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] +; GFX10-DENORM-CONTRACT: global_store_short v{{\[[0-9]+:[0-9]+\]}}, [[REGC]] define amdgpu_kernel void @neg_neg_mad_f16(half addrspace(1)* noalias nocapture %out, half addrspace(1)* noalias nocapture readonly %ptr) #1 { %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0 %tid.ext = sext i32 %tid to i64 From 579882ae4407377671303f41d861d94d39a36549 Mon Sep 17 00:00:00 2001 From: Gabor Marton Date: Mon, 7 Oct 2019 11:14:53 +0000 Subject: [PATCH 098/254] [ASTImporter][NFC] Fix typo in user docs llvm-svn: 373894 --- clang/docs/LibASTImporter.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/docs/LibASTImporter.rst b/clang/docs/LibASTImporter.rst index 6593536f2cc3a..9c02b6ae76e99 100644 --- a/clang/docs/LibASTImporter.rst +++ b/clang/docs/LibASTImporter.rst @@ -106,7 +106,7 @@ Next, we define a matcher to match ``MyClass`` in the "from" context: .. code-block:: cpp - auto Matcher = cxxRecordDecl(hasName("C")); + auto Matcher = cxxRecordDecl(hasName("MyClass")); auto *From = getFirstDecl(Matcher, FromUnit); Now we create the Importer and do the import: From 8f7fbed85e4b4d977bb8af7b7a5dc12e82749e56 Mon Sep 17 00:00:00 2001 From: Gabor Marton Date: Mon, 7 Oct 2019 11:15:18 +0000 Subject: [PATCH 099/254] [ASTImporter][NFC] Update ASTImporter internals docs llvm-svn: 373895 --- clang/docs/InternalsManual.rst | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/clang/docs/InternalsManual.rst b/clang/docs/InternalsManual.rst index 59f3cba699dbb..409432efcfb16 100644 --- a/clang/docs/InternalsManual.rst +++ b/clang/docs/InternalsManual.rst @@ -1519,11 +1519,11 @@ statements are true: - A and X are nodes from the same ASTContext. - B and Y are nodes from the same ASTContext. - A and B may or may not be from the same ASTContext. -- if A == X (pointer equivalency) then (there is a cycle during the traverse) +- if A == X and B == Y (pointer equivalency) then (there is a cycle during the + traverse) - A and B are structurally equivalent if and only if - - B and Y are part of the same redeclaration chain, - All dependent nodes on the path from to are structurally equivalent. @@ -1563,15 +1563,6 @@ the whole redeclaration chain of the function. The most recent version of the declarations - regardless if they are definitions or prototypes - in the order as they appear in the "from" context. -.. Structural eq requires proper redecl chains - -Another reason why we must maintain and import redeclaration chains properly is -that the :ref:`Structural Equivalency ` check would report false -positive in-equivalencies otherwise. We must not allow having two (or more) -independent redeclaration chains of structurally equivalent declarations. -Structural equivalency identifies the chains with the canonical declaration, -that becomes different for independent chains. - .. One definition If we have an existing definition in the "to" context, then we cannot import From 305a11d40911c7ae7cd37259feec3451b3a02ee1 Mon Sep 17 00:00:00 2001 From: Gabor Marton Date: Mon, 7 Oct 2019 11:34:54 +0000 Subject: [PATCH 100/254] [ASTImporter][NFC] Enable disabled but passing test RedeclChainShouldBeCorrectAmongstNamespaces llvm-svn: 373896 --- clang/unittests/AST/ASTImporterTest.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/clang/unittests/AST/ASTImporterTest.cpp b/clang/unittests/AST/ASTImporterTest.cpp index 1514313231bf3..ba2feff5fca67 100644 --- a/clang/unittests/AST/ASTImporterTest.cpp +++ b/clang/unittests/AST/ASTImporterTest.cpp @@ -4785,11 +4785,8 @@ TEST_P(ASTImporterLookupTableTest, LookupSearchesInTheWholeRedeclChain) { EXPECT_EQ(*Res.begin(), A); } - -// FIXME This test is disabled currently, upcoming patches will make it -// possible to enable. TEST_P(ASTImporterOptionSpecificTestBase, - DISABLED_RedeclChainShouldBeCorrectAmongstNamespaces) { + RedeclChainShouldBeCorrectAmongstNamespaces) { Decl *FromTU = getTuDecl( R"( namespace NS { From 77c97002dc1ac66b429d8012df0536f0fd78a826 Mon Sep 17 00:00:00 2001 From: Haojian Wu Date: Mon, 7 Oct 2019 11:37:25 +0000 Subject: [PATCH 101/254] [clangd] Catch an unchecked "Expected" in HeaderSourceSwitch. Summary: Also fixes a potential user-after-scope issue of "Path". Reviewers: kadircet Subscribers: ilya-biryukov, MaskRay, jkorous, arphaman, usaxena95, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D68564 llvm-svn: 373897 --- clang-tools-extra/clangd/ClangdLSPServer.cpp | 2 +- clang-tools-extra/clangd/ClangdServer.cpp | 2 +- clang-tools-extra/clangd/HeaderSourceSwitch.cpp | 4 +++- .../clangd/unittests/HeaderSourceSwitchTests.cpp | 7 +++++++ 4 files changed, 12 insertions(+), 3 deletions(-) diff --git a/clang-tools-extra/clangd/ClangdLSPServer.cpp b/clang-tools-extra/clangd/ClangdLSPServer.cpp index 692bd24072a47..ea1517626596d 100644 --- a/clang-tools-extra/clangd/ClangdLSPServer.cpp +++ b/clang-tools-extra/clangd/ClangdLSPServer.cpp @@ -1045,7 +1045,7 @@ void ClangdLSPServer::onSwitchSourceHeader( if (!Path) return Reply(Path.takeError()); if (*Path) - Reply(URIForFile::canonicalize(**Path, Params.uri.file())); + return Reply(URIForFile::canonicalize(**Path, Params.uri.file())); return Reply(llvm::None); }); } diff --git a/clang-tools-extra/clangd/ClangdServer.cpp b/clang-tools-extra/clangd/ClangdServer.cpp index b59bb4136259c..4f1fe8f5b08be 100644 --- a/clang-tools-extra/clangd/ClangdServer.cpp +++ b/clang-tools-extra/clangd/ClangdServer.cpp @@ -460,7 +460,7 @@ void ClangdServer::switchSourceHeader( if (auto CorrespondingFile = getCorrespondingHeaderOrSource(Path, FSProvider.getFileSystem())) return CB(std::move(CorrespondingFile)); - auto Action = [Path, CB = std::move(CB), + auto Action = [Path = Path.str(), CB = std::move(CB), this](llvm::Expected InpAST) mutable { if (!InpAST) return CB(InpAST.takeError()); diff --git a/clang-tools-extra/clangd/HeaderSourceSwitch.cpp b/clang-tools-extra/clangd/HeaderSourceSwitch.cpp index 535c8d6d8e1dc..06ad71d29b277 100644 --- a/clang-tools-extra/clangd/HeaderSourceSwitch.cpp +++ b/clang-tools-extra/clangd/HeaderSourceSwitch.cpp @@ -86,7 +86,9 @@ llvm::Optional getCorrespondingHeaderOrSource(const Path &OriginalFile, if (auto TargetPath = URI::resolve(TargetURI, OriginalFile)) { if (*TargetPath != OriginalFile) // exclude the original file. ++Candidates[*TargetPath]; - }; + } else { + elog("Failed to resolve URI {0}: {1}", TargetURI, TargetPath.takeError()); + } }; // If we switch from a header, we are looking for the implementation // file, so we use the definition loc; otherwise we look for the header file, diff --git a/clang-tools-extra/clangd/unittests/HeaderSourceSwitchTests.cpp b/clang-tools-extra/clangd/unittests/HeaderSourceSwitchTests.cpp index 3b5fe86b96b6c..19838af5903a0 100644 --- a/clang-tools-extra/clangd/unittests/HeaderSourceSwitchTests.cpp +++ b/clang-tools-extra/clangd/unittests/HeaderSourceSwitchTests.cpp @@ -125,6 +125,7 @@ TEST(HeaderSourceSwitchTest, FromHeaderToSource) { Testing.HeaderCode = R"cpp( void B_Sym1(); void B_Sym2(); + void B_Sym3_NoDef(); )cpp"; Testing.Filename = "b.cpp"; Testing.Code = R"cpp( @@ -163,6 +164,12 @@ TEST(HeaderSourceSwitchTest, FromHeaderToSource) { void B_Sym1(); )cpp", testPath("a.cpp")}, + + {R"cpp( + // We don't have definition in the index, so stay in the header. + void B_Sym3_NoDef(); + )cpp", + None}, }; for (const auto &Case : TestCases) { TestTU TU = TestTU::withCode(Case.HeaderCode); From 0fedc26a0dc0066f3968b9fea6a4e1f746c8d5a4 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Mon, 7 Oct 2019 11:46:26 +0000 Subject: [PATCH 102/254] Revert r373888 "[IA] Recognize hexadecimal escape sequences" It broke MC/AsmParser/directive_ascii.s on all bots: Assertion failed: (Index < Length && "Invalid index!"), function operator[], file ../../llvm/include/llvm/ADT/StringRef.h, line 243. llvm-svn: 373898 --- llvm/lib/MC/MCParser/AsmParser.cpp | 17 +---------------- llvm/test/MC/AsmParser/directive_ascii.s | 5 ----- 2 files changed, 1 insertion(+), 21 deletions(-) diff --git a/llvm/lib/MC/MCParser/AsmParser.cpp b/llvm/lib/MC/MCParser/AsmParser.cpp index b25959b102d6d..381bf96416166 100644 --- a/llvm/lib/MC/MCParser/AsmParser.cpp +++ b/llvm/lib/MC/MCParser/AsmParser.cpp @@ -2914,26 +2914,11 @@ bool AsmParser::parseEscapedString(std::string &Data) { } // Recognize escaped characters. Note that this escape semantics currently - // loosely follows Darwin 'as'. + // loosely follows Darwin 'as'. Notably, it doesn't support hex escapes. ++i; if (i == e) return TokError("unexpected backslash at end of string"); - // Recognize hex sequences similarly to GNU 'as'. - if (Str[i] == 'x' || Str[i] == 'X') { - if (!isHexDigit(Str[i + 1])) - return TokError("invalid hexadecimal escape sequence"); - - // Consume hex characters. GNU 'as' reads all hexadecimal characters and - // then truncates to the lower 16 bits. Seems reasonable. - unsigned Value = 0; - while (isHexDigit(Str[i + 1])) - Value = Value * 16 + hexDigitValue(Str[++i]); - - Data += (unsigned char)(Value & 0xFF); - continue; - } - // Recognize octal sequences. if ((unsigned)(Str[i] - '0') <= 7) { // Consume up to three octal characters. diff --git a/llvm/test/MC/AsmParser/directive_ascii.s b/llvm/test/MC/AsmParser/directive_ascii.s index 604f9721bcca9..a7ba7bbd5da13 100644 --- a/llvm/test/MC/AsmParser/directive_ascii.s +++ b/llvm/test/MC/AsmParser/directive_ascii.s @@ -39,8 +39,3 @@ TEST5: # CHECK: .byte 0 TEST6: .string "B", "C" - -# CHECK: TEST7: -# CHECK: .ascii "dk" -TEST7: - .ascii "\x64\Xa6B" From a1f5c258d6928de562afaf21f3e4308530ec75cc Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Mon, 7 Oct 2019 13:13:31 +0000 Subject: [PATCH 103/254] gn build: use better triple on windows The CMake build uses "x86_64-pc-windows-msvc". The "-msvc" suffix is important because e.g. clang/test/lit.cfg.py matches against the suffix "windows-msvc" to compute the presence of the "ms-sdk" and the absence of the "LP64" feature. Differential Revision: https://reviews.llvm.org/D68572 llvm-svn: 373899 --- llvm/utils/gn/secondary/llvm/triples.gni | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/utils/gn/secondary/llvm/triples.gni b/llvm/utils/gn/secondary/llvm/triples.gni index efe8be28513c6..558731295cee7 100644 --- a/llvm/utils/gn/secondary/llvm/triples.gni +++ b/llvm/utils/gn/secondary/llvm/triples.gni @@ -10,7 +10,7 @@ if (current_cpu == "x86") { } else if (current_os == "mac") { llvm_current_triple = "x86_64-apple-darwin" } else if (current_os == "win") { - llvm_current_triple = "x86_64-pc-windows" + llvm_current_triple = "x86_64-pc-windows-msvc" } } else if (current_cpu == "arm64") { if (current_os == "android") { From 1c3d19c82d93a00d11f0df03ff277411d134e061 Mon Sep 17 00:00:00 2001 From: "Kevin P. Neal" Date: Mon, 7 Oct 2019 13:20:00 +0000 Subject: [PATCH 104/254] [FPEnv] Add constrained intrinsics for lrint and lround Earlier in the year intrinsics for lrint, llrint, lround and llround were added to llvm. The constrained versions are now implemented here. Reviewed by: andrew.w.kaylor, craig.topper, cameron.mcinally Approved by: craig.topper Differential Revision: https://reviews.llvm.org/D64746 llvm-svn: 373900 --- llvm/docs/LangRef.rst | 172 ++++++++++++++++++ llvm/include/llvm/CodeGen/ISDOpcodes.h | 1 + llvm/include/llvm/CodeGen/SelectionDAGNodes.h | 4 + llvm/include/llvm/CodeGen/TargetLowering.h | 4 + llvm/include/llvm/IR/IntrinsicInst.h | 4 + llvm/include/llvm/IR/Intrinsics.td | 14 ++ .../include/llvm/Target/TargetSelectionDAG.td | 20 ++ llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 88 ++++++--- .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 4 + .../SelectionDAG/SelectionDAGBuilder.cpp | 16 ++ .../SelectionDAG/SelectionDAGDumper.cpp | 4 + llvm/lib/CodeGen/TargetLoweringBase.cpp | 4 + llvm/lib/IR/IntrinsicInst.cpp | 4 + llvm/lib/IR/Verifier.cpp | 29 +++ llvm/test/CodeGen/X86/fp-intrinsics.ll | 84 +++++++++ llvm/test/Feature/fp-intrinsics.ll | 92 ++++++++++ 16 files changed, 516 insertions(+), 28 deletions(-) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 2d4c57b796811..8d84b04030715 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -15940,6 +15940,102 @@ mode is determined by the runtime floating-point environment. The rounding mode argument is only intended as information to the compiler. +'``llvm.experimental.constrained.lrint``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare + @llvm.experimental.constrained.lrint( , + metadata , + metadata ) + +Overview: +""""""""" + +The '``llvm.experimental.constrained.lrint``' intrinsic returns the first +operand rounded to the nearest integer. An inexact floating-point exception +will be raised if the operand is not an integer. An invalid exception is +raised if the result is too large to fit into a supported integer type, +and in this case the result is undefined. + +Arguments: +"""""""""" + +The first argument is a floating-point number. The return value is an +integer type. Not all types are supported on all targets. The supported +types are the same as the ``llvm.lrint`` intrinsic and the ``lrint`` +libm functions. + +The second and third arguments specify the rounding mode and exception +behavior as described above. + +Semantics: +"""""""""" + +This function returns the same values as the libm ``lrint`` functions +would, and handles error conditions in the same way. + +The rounding mode is described, not determined, by the rounding mode +argument. The actual rounding mode is determined by the runtime floating-point +environment. The rounding mode argument is only intended as information +to the compiler. + +If the runtime floating-point environment is using the default rounding mode +then the results will be the same as the llvm.lrint intrinsic. + + +'``llvm.experimental.constrained.llrint``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare + @llvm.experimental.constrained.llrint( , + metadata , + metadata ) + +Overview: +""""""""" + +The '``llvm.experimental.constrained.llrint``' intrinsic returns the first +operand rounded to the nearest integer. An inexact floating-point exception +will be raised if the operand is not an integer. An invalid exception is +raised if the result is too large to fit into a supported integer type, +and in this case the result is undefined. + +Arguments: +"""""""""" + +The first argument is a floating-point number. The return value is an +integer type. Not all types are supported on all targets. The supported +types are the same as the ``llvm.llrint`` intrinsic and the ``llrint`` +libm functions. + +The second and third arguments specify the rounding mode and exception +behavior as described above. + +Semantics: +"""""""""" + +This function returns the same values as the libm ``llrint`` functions +would, and handles error conditions in the same way. + +The rounding mode is described, not determined, by the rounding mode +argument. The actual rounding mode is determined by the runtime floating-point +environment. The rounding mode argument is only intended as information +to the compiler. + +If the runtime floating-point environment is using the default rounding mode +then the results will be the same as the llvm.llrint intrinsic. + + '``llvm.experimental.constrained.nearbyint``' Intrinsic ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -16162,6 +16258,82 @@ This function returns the same values as the libm ``round`` functions would and handles error conditions in the same way. +'``llvm.experimental.constrained.lround``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare + @llvm.experimental.constrained.lround( , + metadata ) + +Overview: +""""""""" + +The '``llvm.experimental.constrained.lround``' intrinsic returns the first +operand rounded to the nearest integer with ties away from zero. It will +raise an inexact floating-point exception if the operand is not an integer. +An invalid exception is raised if the result is too large to fit into a +supported integer type, and in this case the result is undefined. + +Arguments: +"""""""""" + +The first argument is a floating-point number. The return value is an +integer type. Not all types are supported on all targets. The supported +types are the same as the ``llvm.lround`` intrinsic and the ``lround`` +libm functions. + +The second argument specifies the exception behavior as described above. + +Semantics: +"""""""""" + +This function returns the same values as the libm ``lround`` functions +would and handles error conditions in the same way. + + +'``llvm.experimental.constrained.llround``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare + @llvm.experimental.constrained.llround( , + metadata ) + +Overview: +""""""""" + +The '``llvm.experimental.constrained.llround``' intrinsic returns the first +operand rounded to the nearest integer with ties away from zero. It will +raise an inexact floating-point exception if the operand is not an integer. +An invalid exception is raised if the result is too large to fit into a +supported integer type, and in this case the result is undefined. + +Arguments: +"""""""""" + +The first argument is a floating-point number. The return value is an +integer type. Not all types are supported on all targets. The supported +types are the same as the ``llvm.llround`` intrinsic and the ``llround`` +libm functions. + +The second argument specifies the exception behavior as described above. + +Semantics: +"""""""""" + +This function returns the same values as the libm ``llround`` functions +would and handles error conditions in the same way. + + '``llvm.experimental.constrained.trunc``' Intrinsic ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h index 1059b6bd41b3a..d052cfb43591c 100644 --- a/llvm/include/llvm/CodeGen/ISDOpcodes.h +++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -301,6 +301,7 @@ namespace ISD { STRICT_FEXP, STRICT_FEXP2, STRICT_FLOG, STRICT_FLOG10, STRICT_FLOG2, STRICT_FRINT, STRICT_FNEARBYINT, STRICT_FMAXNUM, STRICT_FMINNUM, STRICT_FCEIL, STRICT_FFLOOR, STRICT_FROUND, STRICT_FTRUNC, + STRICT_LROUND, STRICT_LLROUND, STRICT_LRINT, STRICT_LLRINT, /// STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or /// unsigned integer. These have the same semantics as fptosi and fptoui diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h index 2b00b8568705e..ceb8b72635a29 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h @@ -701,12 +701,16 @@ END_TWO_BYTE_PACK() case ISD::STRICT_FLOG: case ISD::STRICT_FLOG10: case ISD::STRICT_FLOG2: + case ISD::STRICT_LRINT: + case ISD::STRICT_LLRINT: case ISD::STRICT_FRINT: case ISD::STRICT_FNEARBYINT: case ISD::STRICT_FMAXNUM: case ISD::STRICT_FMINNUM: case ISD::STRICT_FCEIL: case ISD::STRICT_FFLOOR: + case ISD::STRICT_LROUND: + case ISD::STRICT_LLROUND: case ISD::STRICT_FROUND: case ISD::STRICT_FTRUNC: case ISD::STRICT_FP_TO_SINT: diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index a5dfb8b86a879..4ab61edec25fa 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -953,12 +953,16 @@ class TargetLoweringBase { case ISD::STRICT_FLOG: EqOpc = ISD::FLOG; break; case ISD::STRICT_FLOG10: EqOpc = ISD::FLOG10; break; case ISD::STRICT_FLOG2: EqOpc = ISD::FLOG2; break; + case ISD::STRICT_LRINT: EqOpc = ISD::LRINT; break; + case ISD::STRICT_LLRINT: EqOpc = ISD::LLRINT; break; case ISD::STRICT_FRINT: EqOpc = ISD::FRINT; break; case ISD::STRICT_FNEARBYINT: EqOpc = ISD::FNEARBYINT; break; case ISD::STRICT_FMAXNUM: EqOpc = ISD::FMAXNUM; break; case ISD::STRICT_FMINNUM: EqOpc = ISD::FMINNUM; break; case ISD::STRICT_FCEIL: EqOpc = ISD::FCEIL; break; case ISD::STRICT_FFLOOR: EqOpc = ISD::FFLOOR; break; + case ISD::STRICT_LROUND: EqOpc = ISD::LROUND; break; + case ISD::STRICT_LLROUND: EqOpc = ISD::LLROUND; break; case ISD::STRICT_FROUND: EqOpc = ISD::FROUND; break; case ISD::STRICT_FTRUNC: EqOpc = ISD::FTRUNC; break; case ISD::STRICT_FP_TO_SINT: EqOpc = ISD::FP_TO_SINT; break; diff --git a/llvm/include/llvm/IR/IntrinsicInst.h b/llvm/include/llvm/IR/IntrinsicInst.h index f415336119ffa..9400f0a0801e8 100644 --- a/llvm/include/llvm/IR/IntrinsicInst.h +++ b/llvm/include/llvm/IR/IntrinsicInst.h @@ -273,12 +273,16 @@ namespace llvm { case Intrinsic::experimental_constrained_log: case Intrinsic::experimental_constrained_log10: case Intrinsic::experimental_constrained_log2: + case Intrinsic::experimental_constrained_lrint: + case Intrinsic::experimental_constrained_llrint: case Intrinsic::experimental_constrained_rint: case Intrinsic::experimental_constrained_nearbyint: case Intrinsic::experimental_constrained_maxnum: case Intrinsic::experimental_constrained_minnum: case Intrinsic::experimental_constrained_ceil: case Intrinsic::experimental_constrained_floor: + case Intrinsic::experimental_constrained_lround: + case Intrinsic::experimental_constrained_llround: case Intrinsic::experimental_constrained_round: case Intrinsic::experimental_constrained_trunc: return true; diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index 2d2e2a6f3d6f7..e764ad4e566eb 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -703,6 +703,14 @@ let IntrProperties = [IntrInaccessibleMemOnly, IntrWillReturn] in { [ LLVMMatchType<0>, llvm_metadata_ty, llvm_metadata_ty ]>; + def int_experimental_constrained_lrint : Intrinsic<[ llvm_anyint_ty ], + [ llvm_anyfloat_ty, + llvm_metadata_ty, + llvm_metadata_ty ]>; + def int_experimental_constrained_llrint : Intrinsic<[ llvm_anyint_ty ], + [ llvm_anyfloat_ty, + llvm_metadata_ty, + llvm_metadata_ty ]>; def int_experimental_constrained_maxnum : Intrinsic<[ llvm_anyfloat_ty ], [ LLVMMatchType<0>, LLVMMatchType<0>, @@ -721,6 +729,12 @@ let IntrProperties = [IntrInaccessibleMemOnly, IntrWillReturn] in { [ LLVMMatchType<0>, llvm_metadata_ty, llvm_metadata_ty ]>; + def int_experimental_constrained_lround : Intrinsic<[ llvm_anyint_ty ], + [ llvm_anyfloat_ty, + llvm_metadata_ty ]>; + def int_experimental_constrained_llround : Intrinsic<[ llvm_anyint_ty ], + [ llvm_anyfloat_ty, + llvm_metadata_ty ]>; def int_experimental_constrained_round : Intrinsic<[ llvm_anyfloat_ty ], [ LLVMMatchType<0>, llvm_metadata_ty, diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td index 8856cf003af00..441f3d7d118d1 100644 --- a/llvm/include/llvm/Target/TargetSelectionDAG.td +++ b/llvm/include/llvm/Target/TargetSelectionDAG.td @@ -506,12 +506,20 @@ def strict_flog2 : SDNode<"ISD::STRICT_FLOG2", SDTFPUnaryOp, [SDNPHasChain]>; def strict_frint : SDNode<"ISD::STRICT_FRINT", SDTFPUnaryOp, [SDNPHasChain]>; +def strict_lrint : SDNode<"ISD::STRICT_LRINT", + SDTFPToIntOp, [SDNPHasChain]>; +def strict_llrint : SDNode<"ISD::STRICT_LLRINT", + SDTFPToIntOp, [SDNPHasChain]>; def strict_fnearbyint : SDNode<"ISD::STRICT_FNEARBYINT", SDTFPUnaryOp, [SDNPHasChain]>; def strict_fceil : SDNode<"ISD::STRICT_FCEIL", SDTFPUnaryOp, [SDNPHasChain]>; def strict_ffloor : SDNode<"ISD::STRICT_FFLOOR", SDTFPUnaryOp, [SDNPHasChain]>; +def strict_lround : SDNode<"ISD::STRICT_LROUND", + SDTFPToIntOp, [SDNPHasChain]>; +def strict_llround : SDNode<"ISD::STRICT_LLROUND", + SDTFPToIntOp, [SDNPHasChain]>; def strict_fround : SDNode<"ISD::STRICT_FROUND", SDTFPUnaryOp, [SDNPHasChain]>; def strict_ftrunc : SDNode<"ISD::STRICT_FTRUNC", @@ -1339,6 +1347,12 @@ def any_flog2 : PatFrags<(ops node:$src), def any_frint : PatFrags<(ops node:$src), [(strict_frint node:$src), (frint node:$src)]>; +def any_lrint : PatFrags<(ops node:$src), + [(strict_lrint node:$src), + (lrint node:$src)]>; +def any_llrint : PatFrags<(ops node:$src), + [(strict_llrint node:$src), + (llrint node:$src)]>; def any_fnearbyint : PatFrags<(ops node:$src), [(strict_fnearbyint node:$src), (fnearbyint node:$src)]>; @@ -1348,6 +1362,12 @@ def any_fceil : PatFrags<(ops node:$src), def any_ffloor : PatFrags<(ops node:$src), [(strict_ffloor node:$src), (ffloor node:$src)]>; +def any_lround : PatFrags<(ops node:$src), + [(strict_lround node:$src), + (lround node:$src)]>; +def any_llround : PatFrags<(ops node:$src), + [(strict_llround node:$src), + (llround node:$src)]>; def any_fround : PatFrags<(ops node:$src), [(strict_fround node:$src), (fround node:$src)]>; diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 0efcaaa19cbde..f40565c5fd12c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -1103,6 +1103,16 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { return; } break; + case ISD::STRICT_LRINT: + case ISD::STRICT_LLRINT: + case ISD::STRICT_LROUND: + case ISD::STRICT_LLROUND: + // These pseudo-ops are the same as the other STRICT_ ops except + // they are registered with setOperationAction() using the input type + // instead of the output type. + Action = TLI.getStrictFPOperationAction(Node->getOpcode(), + Node->getOperand(1).getValueType()); + break; case ISD::SADDSAT: case ISD::UADDSAT: case ISD::SSUBSAT: @@ -2141,6 +2151,9 @@ SDValue SelectionDAGLegalize::ExpandArgFPLibCall(SDNode* Node, RTLIB::Libcall Call_F80, RTLIB::Libcall Call_F128, RTLIB::Libcall Call_PPCF128) { + if (Node->isStrictFPOpcode()) + Node = DAG.mutateStrictFPToFP(Node); + RTLIB::Libcall LC; switch (Node->getOperand(0).getValueType().getSimpleVT().SimpleTy) { default: llvm_unreachable("Unexpected request for libcall!"); @@ -2895,30 +2908,6 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { return true; } break; - case ISD::LROUND: - Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LROUND_F32, - RTLIB::LROUND_F64, RTLIB::LROUND_F80, - RTLIB::LROUND_F128, - RTLIB::LROUND_PPCF128)); - break; - case ISD::LLROUND: - Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LLROUND_F32, - RTLIB::LLROUND_F64, RTLIB::LLROUND_F80, - RTLIB::LLROUND_F128, - RTLIB::LLROUND_PPCF128)); - break; - case ISD::LRINT: - Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LRINT_F32, - RTLIB::LRINT_F64, RTLIB::LRINT_F80, - RTLIB::LRINT_F128, - RTLIB::LRINT_PPCF128)); - break; - case ISD::LLRINT: - Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LLRINT_F32, - RTLIB::LLRINT_F64, RTLIB::LLRINT_F80, - RTLIB::LLRINT_F128, - RTLIB::LLRINT_PPCF128)); - break; case ISD::VAARG: Results.push_back(DAG.expandVAArg(Node)); Results.push_back(Results[0].getValue(1)); @@ -3712,10 +3701,25 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { // the "strict" properties. For now, we just fall back to the non-strict // version if that is legal on the target. The actual mutation of the // operation will happen in SelectionDAGISel::DoInstructionSelection. - if (TLI.getStrictFPOperationAction(Node->getOpcode(), - Node->getValueType(0)) - == TargetLowering::Legal) - return true; + switch (Node->getOpcode()) { + default: + if (TLI.getStrictFPOperationAction(Node->getOpcode(), + Node->getValueType(0)) + == TargetLowering::Legal) + return true; + break; + case ISD::STRICT_LRINT: + case ISD::STRICT_LLRINT: + case ISD::STRICT_LROUND: + case ISD::STRICT_LLROUND: + // These are registered by the operand type instead of the value + // type. Reflect that here. + if (TLI.getStrictFPOperationAction(Node->getOpcode(), + Node->getOperand(1).getValueType()) + == TargetLowering::Legal) + return true; + break; + } } // Replace the original node with the legalized result. @@ -3959,6 +3963,34 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { RTLIB::POW_F80, RTLIB::POW_F128, RTLIB::POW_PPCF128)); break; + case ISD::LROUND: + case ISD::STRICT_LROUND: + Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LROUND_F32, + RTLIB::LROUND_F64, RTLIB::LROUND_F80, + RTLIB::LROUND_F128, + RTLIB::LROUND_PPCF128)); + break; + case ISD::LLROUND: + case ISD::STRICT_LLROUND: + Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LLROUND_F32, + RTLIB::LLROUND_F64, RTLIB::LLROUND_F80, + RTLIB::LLROUND_F128, + RTLIB::LLROUND_PPCF128)); + break; + case ISD::LRINT: + case ISD::STRICT_LRINT: + Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LRINT_F32, + RTLIB::LRINT_F64, RTLIB::LRINT_F80, + RTLIB::LRINT_F128, + RTLIB::LRINT_PPCF128)); + break; + case ISD::LLRINT: + case ISD::STRICT_LLRINT: + Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LLRINT_F32, + RTLIB::LLRINT_F64, RTLIB::LLRINT_F80, + RTLIB::LLRINT_F128, + RTLIB::LLRINT_PPCF128)); + break; case ISD::FDIV: Results.push_back(ExpandFPLibCall(Node, RTLIB::DIV_F32, RTLIB::DIV_F64, RTLIB::DIV_F80, RTLIB::DIV_F128, diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 0c55ff73c3b74..52a71b91d93f6 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -7756,12 +7756,16 @@ SDNode* SelectionDAG::mutateStrictFPToFP(SDNode *Node) { case ISD::STRICT_FLOG: NewOpc = ISD::FLOG; break; case ISD::STRICT_FLOG10: NewOpc = ISD::FLOG10; break; case ISD::STRICT_FLOG2: NewOpc = ISD::FLOG2; break; + case ISD::STRICT_LRINT: NewOpc = ISD::LRINT; break; + case ISD::STRICT_LLRINT: NewOpc = ISD::LLRINT; break; case ISD::STRICT_FRINT: NewOpc = ISD::FRINT; break; case ISD::STRICT_FNEARBYINT: NewOpc = ISD::FNEARBYINT; break; case ISD::STRICT_FMAXNUM: NewOpc = ISD::FMAXNUM; break; case ISD::STRICT_FMINNUM: NewOpc = ISD::FMINNUM; break; case ISD::STRICT_FCEIL: NewOpc = ISD::FCEIL; break; case ISD::STRICT_FFLOOR: NewOpc = ISD::FFLOOR; break; + case ISD::STRICT_LROUND: NewOpc = ISD::LROUND; break; + case ISD::STRICT_LLROUND: NewOpc = ISD::LLROUND; break; case ISD::STRICT_FROUND: NewOpc = ISD::FROUND; break; case ISD::STRICT_FTRUNC: NewOpc = ISD::FTRUNC; break; case ISD::STRICT_FP_ROUND: NewOpc = ISD::FP_ROUND; break; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 31cecc01d9d3f..5380630eabf6f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -6104,12 +6104,16 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, case Intrinsic::experimental_constrained_log: case Intrinsic::experimental_constrained_log10: case Intrinsic::experimental_constrained_log2: + case Intrinsic::experimental_constrained_lrint: + case Intrinsic::experimental_constrained_llrint: case Intrinsic::experimental_constrained_rint: case Intrinsic::experimental_constrained_nearbyint: case Intrinsic::experimental_constrained_maxnum: case Intrinsic::experimental_constrained_minnum: case Intrinsic::experimental_constrained_ceil: case Intrinsic::experimental_constrained_floor: + case Intrinsic::experimental_constrained_lround: + case Intrinsic::experimental_constrained_llround: case Intrinsic::experimental_constrained_round: case Intrinsic::experimental_constrained_trunc: visitConstrainedFPIntrinsic(cast(I)); @@ -6935,6 +6939,12 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic( case Intrinsic::experimental_constrained_log2: Opcode = ISD::STRICT_FLOG2; break; + case Intrinsic::experimental_constrained_lrint: + Opcode = ISD::STRICT_LRINT; + break; + case Intrinsic::experimental_constrained_llrint: + Opcode = ISD::STRICT_LLRINT; + break; case Intrinsic::experimental_constrained_rint: Opcode = ISD::STRICT_FRINT; break; @@ -6953,6 +6963,12 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic( case Intrinsic::experimental_constrained_floor: Opcode = ISD::STRICT_FFLOOR; break; + case Intrinsic::experimental_constrained_lround: + Opcode = ISD::STRICT_LROUND; + break; + case Intrinsic::experimental_constrained_llround: + Opcode = ISD::STRICT_LLROUND; + break; case Intrinsic::experimental_constrained_round: Opcode = ISD::STRICT_FROUND; break; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 003dbb233b328..462b719735d28 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -333,9 +333,13 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::FP16_TO_FP: return "fp16_to_fp"; case ISD::FP_TO_FP16: return "fp_to_fp16"; case ISD::LROUND: return "lround"; + case ISD::STRICT_LROUND: return "strict_lround"; case ISD::LLROUND: return "llround"; + case ISD::STRICT_LLROUND: return "strict_llround"; case ISD::LRINT: return "lrint"; + case ISD::STRICT_LRINT: return "strict_lrint"; case ISD::LLRINT: return "llrint"; + case ISD::STRICT_LLRINT: return "strict_llrint"; // Control flow instructions case ISD::BR: return "br"; diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index bc005a2cc27be..1c11ca3286eb6 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -709,10 +709,14 @@ void TargetLoweringBase::initActions() { setOperationAction(ISD::STRICT_FLOG, VT, Expand); setOperationAction(ISD::STRICT_FLOG10, VT, Expand); setOperationAction(ISD::STRICT_FLOG2, VT, Expand); + setOperationAction(ISD::STRICT_LRINT, VT, Expand); + setOperationAction(ISD::STRICT_LLRINT, VT, Expand); setOperationAction(ISD::STRICT_FRINT, VT, Expand); setOperationAction(ISD::STRICT_FNEARBYINT, VT, Expand); setOperationAction(ISD::STRICT_FCEIL, VT, Expand); setOperationAction(ISD::STRICT_FFLOOR, VT, Expand); + setOperationAction(ISD::STRICT_LROUND, VT, Expand); + setOperationAction(ISD::STRICT_LLROUND, VT, Expand); setOperationAction(ISD::STRICT_FROUND, VT, Expand); setOperationAction(ISD::STRICT_FTRUNC, VT, Expand); setOperationAction(ISD::STRICT_FMAXNUM, VT, Expand); diff --git a/llvm/lib/IR/IntrinsicInst.cpp b/llvm/lib/IR/IntrinsicInst.cpp index d3c948d6de385..26ed46a9cd918 100644 --- a/llvm/lib/IR/IntrinsicInst.cpp +++ b/llvm/lib/IR/IntrinsicInst.cpp @@ -200,10 +200,14 @@ bool ConstrainedFPIntrinsic::isUnaryOp() const { case Intrinsic::experimental_constrained_log: case Intrinsic::experimental_constrained_log10: case Intrinsic::experimental_constrained_log2: + case Intrinsic::experimental_constrained_lrint: + case Intrinsic::experimental_constrained_llrint: case Intrinsic::experimental_constrained_rint: case Intrinsic::experimental_constrained_nearbyint: case Intrinsic::experimental_constrained_ceil: case Intrinsic::experimental_constrained_floor: + case Intrinsic::experimental_constrained_lround: + case Intrinsic::experimental_constrained_llround: case Intrinsic::experimental_constrained_round: case Intrinsic::experimental_constrained_trunc: return true; diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index 6739ef26ed413..173d69471fce2 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -4308,12 +4308,16 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) { case Intrinsic::experimental_constrained_log: case Intrinsic::experimental_constrained_log10: case Intrinsic::experimental_constrained_log2: + case Intrinsic::experimental_constrained_lrint: + case Intrinsic::experimental_constrained_llrint: case Intrinsic::experimental_constrained_rint: case Intrinsic::experimental_constrained_nearbyint: case Intrinsic::experimental_constrained_maxnum: case Intrinsic::experimental_constrained_minnum: case Intrinsic::experimental_constrained_ceil: case Intrinsic::experimental_constrained_floor: + case Intrinsic::experimental_constrained_lround: + case Intrinsic::experimental_constrained_llround: case Intrinsic::experimental_constrained_round: case Intrinsic::experimental_constrained_trunc: visitConstrainedFPIntrinsic(cast(Call)); @@ -4766,6 +4770,31 @@ void Verifier::visitConstrainedFPIntrinsic(ConstrainedFPIntrinsic &FPI) { HasRoundingMD = true; break; + case Intrinsic::experimental_constrained_lrint: + case Intrinsic::experimental_constrained_llrint: { + Assert((NumOperands == 3), "invalid arguments for constrained FP intrinsic", + &FPI); + Type *ValTy = FPI.getArgOperand(0)->getType(); + Type *ResultTy = FPI.getType(); + Assert(!ValTy->isVectorTy() && !ResultTy->isVectorTy(), + "Intrinsic does not support vectors", &FPI); + HasExceptionMD = true; + HasRoundingMD = true; + } + break; + + case Intrinsic::experimental_constrained_lround: + case Intrinsic::experimental_constrained_llround: { + Assert((NumOperands == 2), "invalid arguments for constrained FP intrinsic", + &FPI); + Type *ValTy = FPI.getArgOperand(0)->getType(); + Type *ResultTy = FPI.getType(); + Assert(!ValTy->isVectorTy() && !ResultTy->isVectorTy(), + "Intrinsic does not support vectors", &FPI); + HasExceptionMD = true; + break; + } + case Intrinsic::experimental_constrained_fma: Assert((NumOperands == 5), "invalid arguments for constrained FP intrinsic", &FPI); diff --git a/llvm/test/CodeGen/X86/fp-intrinsics.ll b/llvm/test/CodeGen/X86/fp-intrinsics.ll index a41bd9249c17f..2f5224eaf6c2f 100644 --- a/llvm/test/CodeGen/X86/fp-intrinsics.ll +++ b/llvm/test/CodeGen/X86/fp-intrinsics.ll @@ -342,6 +342,82 @@ entry: ret double %result } +; CHECK-LABEL: f23 +; COMMON: jmp lrint +define i32 @f23(double %x) #0 { +entry: + %result = call i32 @llvm.experimental.constrained.lrint.i32.f64(double %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret i32 %result +} + +; CHECK-LABEL: f24 +; COMMON: jmp lrintf +define i32 @f24(float %x) #0 { +entry: + %result = call i32 @llvm.experimental.constrained.lrint.i32.f32(float %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret i32 %result +} + +; CHECK-LABEL: f25 +; COMMON: jmp llrint +define i64 @f25(double %x) #0 { +entry: + %result = call i64 @llvm.experimental.constrained.llrint.i64.f64(double %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret i64 %result +} + +; CHECK-LABEL: f26 +; COMMON: jmp llrintf +define i64 @f26(float %x) { +entry: + %result = call i64 @llvm.experimental.constrained.llrint.i64.f32(float %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret i64 %result +} + +; CHECK-LABEL: f27 +; COMMON: jmp lround +define i32 @f27(double %x) #0 { +entry: + %result = call i32 @llvm.experimental.constrained.lround.i32.f64(double %x, + metadata !"fpexcept.strict") #0 + ret i32 %result +} + +; CHECK-LABEL: f28 +; COMMON: jmp lroundf +define i32 @f28(float %x) #0 { +entry: + %result = call i32 @llvm.experimental.constrained.lround.i32.f32(float %x, + metadata !"fpexcept.strict") #0 + ret i32 %result +} + +; CHECK-LABEL: f29 +; COMMON: jmp llround +define i64 @f29(double %x) #0 { +entry: + %result = call i64 @llvm.experimental.constrained.llround.i64.f64(double %x, + metadata !"fpexcept.strict") #0 + ret i64 %result +} + +; CHECK-LABEL: f30 +; COMMON: jmp llroundf +define i64 @f30(float %x) #0 { +entry: + %result = call i64 @llvm.experimental.constrained.llround.i64.f32(float %x, + metadata !"fpexcept.strict") #0 + ret i64 %result +} + attributes #0 = { strictfp } @llvm.fp.env = thread_local global i8 zeroinitializer, section "llvm.metadata" @@ -368,3 +444,11 @@ declare i32 @llvm.experimental.constrained.fptosi.i32.f64(double, metadata) declare i32 @llvm.experimental.constrained.fptoui.i32.f64(double, metadata) declare float @llvm.experimental.constrained.fptrunc.f32.f64(double, metadata, metadata) declare double @llvm.experimental.constrained.fpext.f64.f32(float, metadata) +declare i32 @llvm.experimental.constrained.lrint.i32.f64(double, metadata, metadata) +declare i32 @llvm.experimental.constrained.lrint.i32.f32(float, metadata, metadata) +declare i64 @llvm.experimental.constrained.llrint.i64.f64(double, metadata, metadata) +declare i64 @llvm.experimental.constrained.llrint.i64.f32(float, metadata, metadata) +declare i32 @llvm.experimental.constrained.lround.i32.f64(double, metadata) +declare i32 @llvm.experimental.constrained.lround.i32.f32(float, metadata) +declare i64 @llvm.experimental.constrained.llround.i64.f64(double, metadata) +declare i64 @llvm.experimental.constrained.llround.i64.f32(float, metadata) diff --git a/llvm/test/Feature/fp-intrinsics.ll b/llvm/test/Feature/fp-intrinsics.ll index 355cbd90e2e1d..616897c3a0023 100644 --- a/llvm/test/Feature/fp-intrinsics.ll +++ b/llvm/test/Feature/fp-intrinsics.ll @@ -289,6 +289,90 @@ entry: ret double %result } +; Verify that lrint(42.1) isn't simplified when the rounding mode is unknown. +; CHECK-LABEL: f22 +; CHECK: call i32 @llvm.experimental.constrained.lrint +define i32 @f22() #0 { +entry: + %result = call i32 @llvm.experimental.constrained.lrint.i32.f64(double 42.1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret i32 %result +} + +; Verify that lrintf(42.0) isn't simplified when the rounding mode is unknown. +; CHECK-LABEL: f23 +; CHECK: call i32 @llvm.experimental.constrained.lrint +define i32 @f23() #0 { +entry: + %result = call i32 @llvm.experimental.constrained.lrint.i32.f32(float 42.0, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret i32 %result +} + +; Verify that llrint(42.1) isn't simplified when the rounding mode is unknown. +; CHECK-LABEL: f24 +; CHECK: call i64 @llvm.experimental.constrained.llrint +define i64 @f24() #0 { +entry: + %result = call i64 @llvm.experimental.constrained.llrint.i64.f64(double 42.1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret i64 %result +} + +; Verify that llrint(42.0) isn't simplified when the rounding mode is unknown. +; CHECK-LABEL: f25 +; CHECK: call i64 @llvm.experimental.constrained.llrint +define i64 @f25() #0 { +entry: + %result = call i64 @llvm.experimental.constrained.llrint.i64.f32(float 42.0, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret i64 %result +} + +; Verify that lround(42.1) isn't simplified when the rounding mode is unknown. +; CHECK-LABEL: f26 +; CHECK: call i32 @llvm.experimental.constrained.lround +define i32 @f26() #0 { +entry: + %result = call i32 @llvm.experimental.constrained.lround.i32.f64(double 42.1, + metadata !"fpexcept.strict") #0 + ret i32 %result +} + +; Verify that lround(42.0) isn't simplified when the rounding mode is unknown. +; CHECK-LABEL: f27 +; CHECK: call i32 @llvm.experimental.constrained.lround +define i32 @f27() #0 { +entry: + %result = call i32 @llvm.experimental.constrained.lround.i32.f32(float 42.0, + metadata !"fpexcept.strict") #0 + ret i32 %result +} + +; Verify that llround(42.1) isn't simplified when the rounding mode is unknown. +; CHECK-LABEL: f28 +; CHECK: call i64 @llvm.experimental.constrained.llround +define i64 @f28() #0 { +entry: + %result = call i64 @llvm.experimental.constrained.llround.i64.f64(double 42.1, + metadata !"fpexcept.strict") #0 + ret i64 %result +} + +; Verify that llround(42.0) isn't simplified when the rounding mode is unknown. +; CHECK-LABEL: f29 +; CHECK: call i64 @llvm.experimental.constrained.llround +define i64 @f29() #0 { +entry: + %result = call i64 @llvm.experimental.constrained.llround.i64.f32(float 42.0, + metadata !"fpexcept.strict") #0 + ret i64 %result +} + attributes #0 = { strictfp } @llvm.fp.env = thread_local global i8 zeroinitializer, section "llvm.metadata" @@ -313,3 +397,11 @@ declare i32 @llvm.experimental.constrained.fptosi.i32.f64(double, metadata) declare i32 @llvm.experimental.constrained.fptoui.i32.f64(double, metadata) declare float @llvm.experimental.constrained.fptrunc.f32.f64(double, metadata, metadata) declare double @llvm.experimental.constrained.fpext.f64.f32(float, metadata) +declare i32 @llvm.experimental.constrained.lrint.i32.f64(double, metadata, metadata) +declare i32 @llvm.experimental.constrained.lrint.i32.f32(float, metadata, metadata) +declare i64 @llvm.experimental.constrained.llrint.i64.f64(double, metadata, metadata) +declare i64 @llvm.experimental.constrained.llrint.i64.f32(float, metadata, metadata) +declare i32 @llvm.experimental.constrained.lround.i32.f64(double, metadata) +declare i32 @llvm.experimental.constrained.lround.i32.f32(float, metadata) +declare i64 @llvm.experimental.constrained.llround.i64.f64(double, metadata) +declare i64 @llvm.experimental.constrained.llround.i64.f32(float, metadata) From b5fbdf1f5e0efb131512097babbd35a9081b8023 Mon Sep 17 00:00:00 2001 From: Mirko Brkusanin Date: Mon, 7 Oct 2019 13:23:12 +0000 Subject: [PATCH 105/254] Test commit Fix comment. llvm-svn: 373901 --- llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h b/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h index ce594e1fb4fa5..80ab1ea9f635f 100644 --- a/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h +++ b/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h @@ -120,7 +120,7 @@ class MipsSEDAGToDAGISel : public MipsDAGToDAGISel { /// power of 2. bool selectVSplatUimmInvPow2(SDValue N, SDValue &Imm) const override; /// Select constant vector splats whose value is a run of set bits - /// ending at the most significant bit + /// ending at the most significant bit. bool selectVSplatMaskL(SDValue N, SDValue &Imm) const override; /// Select constant vector splats whose value is a run of set bits /// starting at bit zero. From a6fc72fba9dc3cc5d02236190df9d661563ddfd7 Mon Sep 17 00:00:00 2001 From: "Kevin P. Neal" Date: Mon, 7 Oct 2019 13:39:56 +0000 Subject: [PATCH 106/254] Fix sphinx warnings. Differential Revision: https://reviews.llvm.org/D64746 llvm-svn: 373902 --- llvm/docs/LangRef.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 8d84b04030715..a5710e2cbbad8 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -15941,7 +15941,7 @@ mode argument is only intended as information to the compiler. '``llvm.experimental.constrained.lrint``' Intrinsic -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Syntax: """"""" @@ -15989,7 +15989,7 @@ then the results will be the same as the llvm.lrint intrinsic. '``llvm.experimental.constrained.llrint``' Intrinsic -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Syntax: """"""" From b63db94fa54789d8241b16e05f2c9d9959afe297 Mon Sep 17 00:00:00 2001 From: whitequark Date: Mon, 7 Oct 2019 13:57:13 +0000 Subject: [PATCH 107/254] [LLVM-C] Add bindings to create macro debug info Summary: The C API doesn't have the bindings to create macro debug information. Reviewers: whitequark, CodaFi, deadalnix Reviewed By: whitequark Subscribers: aprantl, hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D58334 llvm-svn: 373903 --- llvm/include/llvm-c/DebugInfo.h | 45 +++++++++++++ llvm/lib/IR/DebugInfo.cpp | 20 ++++++ llvm/test/Bindings/llvm-c/debug_info.ll | 90 ++++++++++++++----------- llvm/tools/llvm-c-test/debuginfo.c | 21 ++++++ 4 files changed, 137 insertions(+), 39 deletions(-) diff --git a/llvm/include/llvm-c/DebugInfo.h b/llvm/include/llvm-c/DebugInfo.h index c96faa2fbba4e..e9e0947620ad4 100644 --- a/llvm/include/llvm-c/DebugInfo.h +++ b/llvm/include/llvm-c/DebugInfo.h @@ -169,6 +169,19 @@ typedef unsigned LLVMMetadataKind; */ typedef unsigned LLVMDWARFTypeEncoding; +/** + * Describes the kind of macro declaration used for LLVMDIBuilderCreateMacro. + * @see llvm::dwarf::MacinfoRecordType + * @note Values are from DW_MACINFO_* constants in the DWARF specification. + */ +typedef enum { + LLVMDWARFMacinfoRecordTypeDefine = 0x01, + LLVMDWARFMacinfoRecordTypeMacro = 0x02, + LLVMDWARFMacinfoRecordTypeStartFile = 0x03, + LLVMDWARFMacinfoRecordTypeEndFile = 0x04, + LLVMDWARFMacinfoRecordTypeVendorExt = 0xff +} LLVMDWARFMacinfoRecordType; + /** * The current debug metadata version number. */ @@ -521,6 +534,38 @@ LLVMDIBuilderCreateSubroutineType(LLVMDIBuilderRef Builder, unsigned NumParameterTypes, LLVMDIFlags Flags); +/** + * Create debugging information entry for a macro. + * @param Builder The DIBuilder. + * @param ParentMacroFile Macro parent (could be NULL). + * @param Line Source line number where the macro is defined. + * @param MacroType DW_MACINFO_define or DW_MACINFO_undef. + * @param Name Macro name. + * @param NameLen Macro name length. + * @param Value Macro value. + * @param ValueLen Macro value length. + */ +LLVMMetadataRef LLVMDIBuilderCreateMacro(LLVMDIBuilderRef Builder, + LLVMMetadataRef ParentMacroFile, + unsigned Line, + LLVMDWARFMacinfoRecordType RecordType, + const char *Name, size_t NameLen, + const char *Value, size_t ValueLen); + +/** + * Create debugging information temporary entry for a macro file. + * List of macro node direct children will be calculated by DIBuilder, + * using the \p ParentMacroFile relationship. + * @param Builder The DIBuilder. + * @param ParentMacroFile Macro parent (could be NULL). + * @param Line Source line number where the macro file is included. + * @param File File descriptor containing the name of the macro file. + */ +LLVMMetadataRef +LLVMDIBuilderCreateTempMacroFile(LLVMDIBuilderRef Builder, + LLVMMetadataRef ParentMacroFile, unsigned Line, + LLVMMetadataRef File); + /** * Create debugging information entry for an enumerator. * @param Builder The DIBuilder. diff --git a/llvm/lib/IR/DebugInfo.cpp b/llvm/lib/IR/DebugInfo.cpp index ce47ef2074343..8f5101a4ae72f 100644 --- a/llvm/lib/IR/DebugInfo.cpp +++ b/llvm/lib/IR/DebugInfo.cpp @@ -929,6 +929,26 @@ const char *LLVMDIFileGetSource(LLVMMetadataRef File, unsigned *Len) { return ""; } +LLVMMetadataRef LLVMDIBuilderCreateMacro(LLVMDIBuilderRef Builder, + LLVMMetadataRef ParentMacroFile, + unsigned Line, + LLVMDWARFMacinfoRecordType RecordType, + const char *Name, size_t NameLen, + const char *Value, size_t ValueLen) { + return wrap( + unwrap(Builder)->createMacro(unwrapDI(ParentMacroFile), Line, + static_cast(RecordType), + {Name, NameLen}, {Value, ValueLen})); +} + +LLVMMetadataRef +LLVMDIBuilderCreateTempMacroFile(LLVMDIBuilderRef Builder, + LLVMMetadataRef ParentMacroFile, unsigned Line, + LLVMMetadataRef File) { + return wrap(unwrap(Builder)->createTempMacroFile( + unwrapDI(ParentMacroFile), Line, unwrapDI(File))); +} + LLVMMetadataRef LLVMDIBuilderCreateEnumerator(LLVMDIBuilderRef Builder, const char *Name, size_t NameLen, int64_t Value, diff --git a/llvm/test/Bindings/llvm-c/debug_info.ll b/llvm/test/Bindings/llvm-c/debug_info.ll index 6cddd1ac29a0f..af682fdf19420 100644 --- a/llvm/test/Bindings/llvm-c/debug_info.ll +++ b/llvm/test/Bindings/llvm-c/debug_info.ll @@ -3,13 +3,13 @@ ; CHECK: ; ModuleID = 'debuginfo.c' ; CHECK-NEXT: source_filename = "debuginfo.c" -; CHECK: define i64 @foo(i64 %0, i64 %1, <10 x i64> %2) !dbg !20 { +; CHECK: define i64 @foo(i64 %0, i64 %1, <10 x i64> %2) !dbg !31 { ; CHECK-NEXT: entry: -; CHECK-NEXT: call void @llvm.dbg.declare(metadata i64 0, metadata !27, metadata !DIExpression()), !dbg !32 -; CHECK-NEXT: call void @llvm.dbg.declare(metadata i64 0, metadata !28, metadata !DIExpression()), !dbg !32 -; CHECK-NEXT: call void @llvm.dbg.declare(metadata i64 0, metadata !29, metadata !DIExpression()), !dbg !32 +; CHECK-NEXT: call void @llvm.dbg.declare(metadata i64 0, metadata !38, metadata !DIExpression()), !dbg !43 +; CHECK-NEXT: call void @llvm.dbg.declare(metadata i64 0, metadata !39, metadata !DIExpression()), !dbg !43 +; CHECK-NEXT: call void @llvm.dbg.declare(metadata i64 0, metadata !40, metadata !DIExpression()), !dbg !43 ; CHECK: vars: ; No predecessors! -; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 0, metadata !30, metadata !DIExpression(DW_OP_constu, 0, DW_OP_stack_value)), !dbg !33 +; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 0, metadata !41, metadata !DIExpression(DW_OP_constu, 0, DW_OP_stack_value)), !dbg !44 ; CHECK-NEXT: } ; CHECK: ; Function Attrs: nounwind readnone speculatable @@ -21,39 +21,51 @@ ; CHECK: attributes #0 = { nounwind readnone speculatable willreturn } ; CHECK: !llvm.dbg.cu = !{!0} -; CHECK-NEXT: !FooType = !{!16} +; CHECK-NEXT: !FooType = !{!28} +; CHECK-NEXT: !EnumTest = !{!3} -; CHECK: !0 = distinct !DICompileUnit(language: DW_LANG_C, file: !1, producer: "llvm-c-test", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, globals: !3, imports: !12, splitDebugInlining: false) +; CHECK: !0 = distinct !DICompileUnit(language: DW_LANG_C, file: !1, producer: "llvm-c-test", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, globals: !11, imports: !19, macros: !23, splitDebugInlining: false) ; CHECK-NEXT: !1 = !DIFile(filename: "debuginfo.c", directory: ".") -; CHECK-NEXT: !2 = !{} -; CHECK-NEXT: !3 = !{!4, !8} -; CHECK-NEXT: !4 = !DIGlobalVariableExpression(var: !5, expr: !DIExpression(DW_OP_constu, 0, DW_OP_stack_value)) -; CHECK-NEXT: !5 = distinct !DIGlobalVariable(name: "globalClass", scope: !6, file: !1, line: 1, type: !7, isLocal: true, isDefinition: true) -; CHECK-NEXT: !6 = !DIModule(scope: null, name: "llvm-c-test", includePath: "/test/include/llvm-c-test.h") -; CHECK-NEXT: !7 = !DICompositeType(tag: DW_TAG_structure_type, name: "TestClass", scope: !1, file: !1, line: 42, size: 64, flags: DIFlagObjcClassComplete, elements: !2) -; CHECK-NEXT: !8 = !DIGlobalVariableExpression(var: !9, expr: !DIExpression(DW_OP_constu, 0, DW_OP_stack_value)) -; CHECK-NEXT: !9 = distinct !DIGlobalVariable(name: "global", scope: !6, file: !1, line: 1, type: !10, isLocal: true, isDefinition: true) -; CHECK-NEXT: !10 = !DIDerivedType(tag: DW_TAG_typedef, name: "int64_t", scope: !1, file: !1, line: 42, baseType: !11) -; CHECK-NEXT: !11 = !DIBasicType(name: "Int64", size: 64) -; CHECK-NEXT: !12 = !{!13, !15} -; CHECK-NEXT: !13 = !DIImportedEntity(tag: DW_TAG_imported_module, scope: !6, entity: !14, file: !1, line: 42) -; CHECK-NEXT: !14 = !DIModule(scope: null, name: "llvm-c-test-import", includePath: "/test/include/llvm-c-test-import.h") -; CHECK-NEXT: !15 = !DIImportedEntity(tag: DW_TAG_imported_module, scope: !6, entity: !13, file: !1, line: 42) -; CHECK-NEXT: !16 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !17, size: 192, dwarfAddressSpace: 0) -; CHECK-NEXT: !17 = !DICompositeType(tag: DW_TAG_structure_type, name: "MyStruct", scope: !18, file: !1, size: 192, elements: !19, runtimeLang: DW_LANG_C89, identifier: "MyStruct") -; CHECK-NEXT: !18 = !DINamespace(name: "NameSpace", scope: !6) -; CHECK-NEXT: !19 = !{!11, !11, !11} -; CHECK-NEXT: !20 = distinct !DISubprogram(name: "foo", linkageName: "foo", scope: !1, file: !1, line: 42, type: !21, scopeLine: 42, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition, unit: !0, retainedNodes: !26) -; CHECK-NEXT: !21 = !DISubroutineType(types: !22) -; CHECK-NEXT: !22 = !{!11, !11, !23} -; CHECK-NEXT: !23 = !DICompositeType(tag: DW_TAG_array_type, baseType: !11, size: 640, flags: DIFlagVector, elements: !24) -; CHECK-NEXT: !24 = !{!25} -; CHECK-NEXT: !25 = !DISubrange(count: 10) -; CHECK-NEXT: !26 = !{!27, !28, !29, !30} -; CHECK-NEXT: !27 = !DILocalVariable(name: "a", arg: 1, scope: !20, file: !1, line: 42, type: !11) -; CHECK-NEXT: !28 = !DILocalVariable(name: "b", arg: 2, scope: !20, file: !1, line: 42, type: !11) -; CHECK-NEXT: !29 = !DILocalVariable(name: "c", arg: 3, scope: !20, file: !1, line: 42, type: !23) -; CHECK-NEXT: !30 = !DILocalVariable(name: "d", scope: !31, file: !1, line: 43, type: !11) -; CHECK-NEXT: !31 = distinct !DILexicalBlock(scope: !20, file: !1, line: 42) -; CHECK-NEXT: !32 = !DILocation(line: 42, scope: !20) -; CHECK-NEXT: !33 = !DILocation(line: 43, scope: !20) +; CHECK-NEXT: !2 = !{!3} +; CHECK-NEXT: !3 = !DICompositeType(tag: DW_TAG_enumeration_type, name: "EnumTest", scope: !4, file: !1, baseType: !6, size: 64, elements: !7) +; CHECK-NEXT: !4 = !DINamespace(name: "NameSpace", scope: !5) +; CHECK-NEXT: !5 = !DIModule(scope: null, name: "llvm-c-test", includePath: "/test/include/llvm-c-test.h") +; CHECK-NEXT: !6 = !DIBasicType(name: "Int64", size: 64) +; CHECK-NEXT: !7 = !{!8, !9, !10} +; CHECK-NEXT: !8 = !DIEnumerator(name: "Test_A", value: 0, isUnsigned: true) +; CHECK-NEXT: !9 = !DIEnumerator(name: "Test_B", value: 1, isUnsigned: true) +; CHECK-NEXT: !10 = !DIEnumerator(name: "Test_B", value: 2, isUnsigned: true) +; CHECK-NEXT: !11 = !{!12, !16} +; CHECK-NEXT: !12 = !DIGlobalVariableExpression(var: !13, expr: !DIExpression(DW_OP_constu, 0, DW_OP_stack_value)) +; CHECK-NEXT: !13 = distinct !DIGlobalVariable(name: "globalClass", scope: !5, file: !1, line: 1, type: !14, isLocal: true, isDefinition: true) +; CHECK-NEXT: !14 = !DICompositeType(tag: DW_TAG_structure_type, name: "TestClass", scope: !1, file: !1, line: 42, size: 64, flags: DIFlagObjcClassComplete, elements: !15) +; CHECK-NEXT: !15 = !{} +; CHECK-NEXT: !16 = !DIGlobalVariableExpression(var: !17, expr: !DIExpression(DW_OP_constu, 0, DW_OP_stack_value)) +; CHECK-NEXT: !17 = distinct !DIGlobalVariable(name: "global", scope: !5, file: !1, line: 1, type: !18, isLocal: true, isDefinition: true) +; CHECK-NEXT: !18 = !DIDerivedType(tag: DW_TAG_typedef, name: "int64_t", scope: !1, file: !1, line: 42, baseType: !6) +; CHECK-NEXT: !19 = !{!20, !22} +; CHECK-NEXT: !20 = !DIImportedEntity(tag: DW_TAG_imported_module, scope: !5, entity: !21, file: !1, line: 42) +; CHECK-NEXT: !21 = !DIModule(scope: null, name: "llvm-c-test-import", includePath: "/test/include/llvm-c-test-import.h") +; CHECK-NEXT: !22 = !DIImportedEntity(tag: DW_TAG_imported_module, scope: !5, entity: !20, file: !1, line: 42) +; CHECK-NEXT: !23 = !{!24} +; CHECK-NEXT: !24 = !DIMacroFile(file: !1, nodes: !25) +; CHECK-NEXT: !25 = !{!26, !27} +; CHECK-NEXT: !26 = !DIMacro(type: DW_MACINFO_define, name: "SIMPLE_DEFINE") +; CHECK-NEXT: !27 = !DIMacro(type: DW_MACINFO_define, name: "VALUE_DEFINE", value: "1") +; CHECK-NEXT: !28 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !29, size: 192, dwarfAddressSpace: 0) +; CHECK-NEXT: !29 = !DICompositeType(tag: DW_TAG_structure_type, name: "MyStruct", scope: !4, file: !1, size: 192, elements: !30, runtimeLang: DW_LANG_C89, identifier: "MyStruct") +; CHECK-NEXT: !30 = !{!6, !6, !6} +; CHECK-NEXT: !31 = distinct !DISubprogram(name: "foo", linkageName: "foo", scope: !1, file: !1, line: 42, type: !32, scopeLine: 42, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition, unit: !0, retainedNodes: !37) +; CHECK-NEXT: !32 = !DISubroutineType(types: !33) +; CHECK-NEXT: !33 = !{!6, !6, !34} +; CHECK-NEXT: !34 = !DICompositeType(tag: DW_TAG_array_type, baseType: !6, size: 640, flags: DIFlagVector, elements: !35) +; CHECK-NEXT: !35 = !{!36} +; CHECK-NEXT: !36 = !DISubrange(count: 10) +; CHECK-NEXT: !37 = !{!38, !39, !40, !41} +; CHECK-NEXT: !38 = !DILocalVariable(name: "a", arg: 1, scope: !31, file: !1, line: 42, type: !6) +; CHECK-NEXT: !39 = !DILocalVariable(name: "b", arg: 2, scope: !31, file: !1, line: 42, type: !6) +; CHECK-NEXT: !40 = !DILocalVariable(name: "c", arg: 3, scope: !31, file: !1, line: 42, type: !34) +; CHECK-NEXT: !41 = !DILocalVariable(name: "d", scope: !42, file: !1, line: 43, type: !6) +; CHECK-NEXT: !42 = distinct !DILexicalBlock(scope: !31, file: !1, line: 42) +; CHECK-NEXT: !43 = !DILocation(line: 42, scope: !31) +; CHECK-NEXT: !44 = !DILocation(line: 43, scope: !31) diff --git a/llvm/tools/llvm-c-test/debuginfo.c b/llvm/tools/llvm-c-test/debuginfo.c index ff96037d4af10..e498de6a745ad 100644 --- a/llvm/tools/llvm-c-test/debuginfo.c +++ b/llvm/tools/llvm-c-test/debuginfo.c @@ -170,6 +170,27 @@ int llvm_test_dibuilder(void) { LLVMDIBuilderInsertDbgValueAtEnd(DIB, FooVal1, FooVar1, FooVarValueExpr, FooVarsLocation, FooVarBlock); + LLVMMetadataRef MacroFile = + LLVMDIBuilderCreateTempMacroFile(DIB, NULL, 0, File); + LLVMDIBuilderCreateMacro(DIB, MacroFile, 0, LLVMDWARFMacinfoRecordTypeDefine, + "SIMPLE_DEFINE", 13, NULL, 0); + LLVMDIBuilderCreateMacro(DIB, MacroFile, 0, LLVMDWARFMacinfoRecordTypeDefine, + "VALUE_DEFINE", 12, "1", 1); + + LLVMMetadataRef EnumeratorTestA = + LLVMDIBuilderCreateEnumerator(DIB, "Test_A", strlen("Test_A"), 0, true); + LLVMMetadataRef EnumeratorTestB = + LLVMDIBuilderCreateEnumerator(DIB, "Test_B", strlen("Test_B"), 1, true); + LLVMMetadataRef EnumeratorTestC = + LLVMDIBuilderCreateEnumerator(DIB, "Test_B", strlen("Test_C"), 2, true); + LLVMMetadataRef EnumeratorsTest[] = {EnumeratorTestA, EnumeratorTestB, + EnumeratorTestC}; + LLVMMetadataRef EnumTest = LLVMDIBuilderCreateEnumerationType( + DIB, NameSpace, "EnumTest", strlen("EnumTest"), File, 0, 64, 0, + EnumeratorsTest, 3, Int64Ty); + LLVMAddNamedMetadataOperand( + M, "EnumTest", LLVMMetadataAsValue(LLVMGetModuleContext(M), EnumTest)); + LLVMDIBuilderFinalize(DIB); char *MStr = LLVMPrintModuleToString(M); From 3459a4c770bacf10bced00758bb2b70c4c504207 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 7 Oct 2019 13:58:05 +0000 Subject: [PATCH 108/254] AST - silence static analyzer getAs<> null dereference warnings. NFCI. The static analyzer is warning about potential null dereferences, but in these cases we should be able to use castAs<> directly and if not assert will fire for us. llvm-svn: 373904 --- clang/lib/AST/ExprCXX.cpp | 9 +++------ clang/lib/AST/Interp/Program.cpp | 2 +- clang/lib/AST/Mangle.cpp | 2 +- clang/lib/AST/StmtPrinter.cpp | 8 ++++---- clang/lib/AST/TemplateBase.cpp | 2 +- clang/lib/AST/TypePrinter.cpp | 2 +- 6 files changed, 11 insertions(+), 14 deletions(-) diff --git a/clang/lib/AST/ExprCXX.cpp b/clang/lib/AST/ExprCXX.cpp index 1d5fd80d0d470..cb6b8703f5e16 100644 --- a/clang/lib/AST/ExprCXX.cpp +++ b/clang/lib/AST/ExprCXX.cpp @@ -251,7 +251,7 @@ QualType CXXDeleteExpr::getDestroyedType() const { if (ArgType->isDependentType() && !ArgType->isPointerType()) return QualType(); - return ArgType->getAs()->getPointeeType(); + return ArgType->castAs()->getPointeeType(); } // CXXPseudoDestructorExpr @@ -1512,11 +1512,8 @@ CXXRecordDecl *UnresolvedMemberExpr::getNamingClass() { // Otherwise the naming class must have been the base class. else { QualType BaseType = getBaseType().getNonReferenceType(); - if (isArrow()) { - const auto *PT = BaseType->getAs(); - assert(PT && "base of arrow member access is not pointer"); - BaseType = PT->getPointeeType(); - } + if (isArrow()) + BaseType = BaseType->castAs()->getPointeeType(); Record = BaseType->getAsCXXRecordDecl(); assert(Record && "base of member expression does not name record"); diff --git a/clang/lib/AST/Interp/Program.cpp b/clang/lib/AST/Interp/Program.cpp index d947b4746f8c4..fcbab0ea8172c 100644 --- a/clang/lib/AST/Interp/Program.cpp +++ b/clang/lib/AST/Interp/Program.cpp @@ -123,7 +123,7 @@ llvm::Optional Program::getOrCreateDummy(const ParmVarDecl *PD) { auto &ASTCtx = Ctx.getASTContext(); // Create a pointer to an incomplete array of the specified elements. - QualType ElemTy = PD->getType()->getAs()->getPointeeType(); + QualType ElemTy = PD->getType()->castAs()->getPointeeType(); QualType Ty = ASTCtx.getIncompleteArrayType(ElemTy, ArrayType::Normal, 0); // Dedup blocks since they are immutable and pointers cannot be compared. diff --git a/clang/lib/AST/Mangle.cpp b/clang/lib/AST/Mangle.cpp index b158fe85a475c..32d466cb57180 100644 --- a/clang/lib/AST/Mangle.cpp +++ b/clang/lib/AST/Mangle.cpp @@ -386,7 +386,7 @@ class ASTNameGenerator::Implementation { auto hasDefaultCXXMethodCC = [](ASTContext &C, const CXXMethodDecl *MD) { auto DefaultCC = C.getDefaultCallingConvention(/*IsVariadic=*/false, /*IsCXXMethod=*/true); - auto CC = MD->getType()->getAs()->getCallConv(); + auto CC = MD->getType()->castAs()->getCallConv(); return CC == DefaultCC; }; diff --git a/clang/lib/AST/StmtPrinter.cpp b/clang/lib/AST/StmtPrinter.cpp index 46802d765e1f0..e86f9c7063eaa 100644 --- a/clang/lib/AST/StmtPrinter.cpp +++ b/clang/lib/AST/StmtPrinter.cpp @@ -1102,7 +1102,7 @@ void StmtPrinter::VisitIntegerLiteral(IntegerLiteral *Node) { OS << Node->getValue().toString(10, isSigned); // Emit suffixes. Integer literals are always a builtin integer type. - switch (Node->getType()->getAs()->getKind()) { + switch (Node->getType()->castAs()->getKind()) { default: llvm_unreachable("Unexpected type for integer literal!"); case BuiltinType::Char_S: case BuiltinType::Char_U: OS << "i8"; break; @@ -1123,7 +1123,7 @@ void StmtPrinter::VisitFixedPointLiteral(FixedPointLiteral *Node) { return; OS << Node->getValueAsString(/*Radix=*/10); - switch (Node->getType()->getAs()->getKind()) { + switch (Node->getType()->castAs()->getKind()) { default: llvm_unreachable("Unexpected type for fixed point literal!"); case BuiltinType::ShortFract: OS << "hr"; break; case BuiltinType::ShortAccum: OS << "hk"; break; @@ -1152,7 +1152,7 @@ static void PrintFloatingLiteral(raw_ostream &OS, FloatingLiteral *Node, return; // Emit suffixes. Float literals are always a builtin float type. - switch (Node->getType()->getAs()->getKind()) { + switch (Node->getType()->castAs()->getKind()) { default: llvm_unreachable("Unexpected type for float literal!"); case BuiltinType::Half: break; // FIXME: suffix? case BuiltinType::Double: break; // no suffix. @@ -1952,7 +1952,7 @@ void StmtPrinter::VisitLambdaExpr(LambdaExpr *Node) { if (Node->isMutable()) OS << " mutable"; - auto *Proto = Method->getType()->getAs(); + auto *Proto = Method->getType()->castAs(); Proto->printExceptionSpecification(OS, Policy); // FIXME: Attributes diff --git a/clang/lib/AST/TemplateBase.cpp b/clang/lib/AST/TemplateBase.cpp index cb4cbd2f76a12..db16c2a06b64f 100644 --- a/clang/lib/AST/TemplateBase.cpp +++ b/clang/lib/AST/TemplateBase.cpp @@ -370,7 +370,7 @@ TemplateArgument TemplateArgument::getPackExpansionPattern() const { switch (getKind()) { case Type: - return getAsType()->getAs()->getPattern(); + return getAsType()->castAs()->getPattern(); case Expression: return cast(getAsExpr())->getPattern(); diff --git a/clang/lib/AST/TypePrinter.cpp b/clang/lib/AST/TypePrinter.cpp index 9c6a3dfeb84d4..d7b7103faaa94 100644 --- a/clang/lib/AST/TypePrinter.cpp +++ b/clang/lib/AST/TypePrinter.cpp @@ -1537,7 +1537,7 @@ void TypePrinter::printAttributedAfter(const AttributedType *T, QualType t = T->getEquivalentType(); while (!t->isFunctionType()) t = t->getPointeeType(); - OS << (t->getAs()->getCallConv() == CC_AAPCS ? + OS << (t->castAs()->getCallConv() == CC_AAPCS ? "\"aapcs\"" : "\"aapcs-vfp\""); OS << ')'; break; From 8dc1700979bc630e6bd8b0912cfc034814e67c7f Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 7 Oct 2019 13:58:15 +0000 Subject: [PATCH 109/254] RewriteModernObjC - silence static analyzer getAs<> null dereference warnings. NFCI. The static analyzer is warning about potential null dereferences, but in these cases we should be able to use castAs<> directly and if not assert will fire for us. llvm-svn: 373905 --- clang/lib/Frontend/Rewrite/RewriteModernObjC.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/clang/lib/Frontend/Rewrite/RewriteModernObjC.cpp b/clang/lib/Frontend/Rewrite/RewriteModernObjC.cpp index 0c2ab7a45312c..985bb07af65df 100644 --- a/clang/lib/Frontend/Rewrite/RewriteModernObjC.cpp +++ b/clang/lib/Frontend/Rewrite/RewriteModernObjC.cpp @@ -2752,7 +2752,7 @@ Stmt *RewriteModernObjC::RewriteObjCArrayLiteralExpr(ObjCArrayLiteral *Exp) { // Create a call to objc_getClass("NSArray"). It will be th 1st argument. ObjCInterfaceDecl *Class = - expType->getPointeeType()->getAs()->getInterface(); + expType->getPointeeType()->castAs()->getInterface(); IdentifierInfo *clsName = Class->getIdentifier(); ClsExprs.push_back(getStringLiteral(clsName->getName())); @@ -2806,7 +2806,7 @@ Stmt *RewriteModernObjC::RewriteObjCArrayLiteralExpr(ObjCArrayLiteral *Exp) { // Don't forget the parens to enforce the proper binding. ParenExpr *PE = new (Context) ParenExpr(StartLoc, EndLoc, cast); - const FunctionType *FT = msgSendType->getAs(); + const FunctionType *FT = msgSendType->castAs(); CallExpr *CE = CallExpr::Create(*Context, PE, MsgExprs, FT->getReturnType(), VK_RValue, EndLoc); ReplaceStmt(Exp, CE); @@ -2894,7 +2894,7 @@ Stmt *RewriteModernObjC::RewriteObjCDictionaryLiteralExpr(ObjCDictionaryLiteral // Create a call to objc_getClass("NSArray"). It will be th 1st argument. ObjCInterfaceDecl *Class = - expType->getPointeeType()->getAs()->getInterface(); + expType->getPointeeType()->castAs()->getInterface(); IdentifierInfo *clsName = Class->getIdentifier(); ClsExprs.push_back(getStringLiteral(clsName->getName())); @@ -2957,7 +2957,7 @@ Stmt *RewriteModernObjC::RewriteObjCDictionaryLiteralExpr(ObjCDictionaryLiteral // Don't forget the parens to enforce the proper binding. ParenExpr *PE = new (Context) ParenExpr(StartLoc, EndLoc, cast); - const FunctionType *FT = msgSendType->getAs(); + const FunctionType *FT = msgSendType->castAs(); CallExpr *CE = CallExpr::Create(*Context, PE, MsgExprs, FT->getReturnType(), VK_RValue, EndLoc); ReplaceStmt(Exp, CE); @@ -3309,7 +3309,7 @@ Stmt *RewriteModernObjC::SynthMessageExpr(ObjCMessageExpr *Exp, case ObjCMessageExpr::Class: { SmallVector ClsExprs; ObjCInterfaceDecl *Class - = Exp->getClassReceiver()->getAs()->getInterface(); + = Exp->getClassReceiver()->castAs()->getInterface(); IdentifierInfo *clsName = Class->getIdentifier(); ClsExprs.push_back(getStringLiteral(clsName->getName())); CallExpr *Cls = SynthesizeCallToFunctionDecl(GetClassFunctionDecl, ClsExprs, @@ -3530,7 +3530,7 @@ Stmt *RewriteModernObjC::SynthMessageExpr(ObjCMessageExpr *Exp, // Don't forget the parens to enforce the proper binding. ParenExpr *PE = new (Context) ParenExpr(StartLoc, EndLoc, cast); - const FunctionType *FT = msgSendType->getAs(); + const FunctionType *FT = msgSendType->castAs(); CallExpr *CE = CallExpr::Create(*Context, PE, MsgExprs, FT->getReturnType(), VK_RValue, EndLoc); Stmt *ReplacingStmt = CE; @@ -3660,7 +3660,7 @@ bool RewriteModernObjC::RewriteObjCFieldDeclType(QualType &Type, } } else if (Type->isEnumeralType()) { - EnumDecl *ED = Type->getAs()->getDecl(); + EnumDecl *ED = Type->castAs()->getDecl(); if (ED->isCompleteDefinition()) { Result += "\n\tenum "; Result += ED->getName(); From 19ede2f53b78472c3bc3536f00609d22253a7d52 Mon Sep 17 00:00:00 2001 From: Simon Atanasyan Date: Mon, 7 Oct 2019 14:01:22 +0000 Subject: [PATCH 110/254] [Mips] Fix evaluating J-format branch targets J/JAL/JALX/JALS are absolute branches, but stay within the current 256 MB-aligned region, so we must include the high bits of the instruction address when calculating the branch target. Patch by James Clarke. Differential Revision: https://reviews.llvm.org/D68548 llvm-svn: 373906 --- .../Mips/MCTargetDesc/MipsMCTargetDesc.cpp | 11 +++++++---- llvm/test/MC/Mips/micromips-jump-pc-region.s | 17 +++++++++++++++++ llvm/test/MC/Mips/mips-jump-pc-region.s | 17 +++++++++++++++++ 3 files changed, 41 insertions(+), 4 deletions(-) create mode 100644 llvm/test/MC/Mips/micromips-jump-pc-region.s create mode 100644 llvm/test/MC/Mips/mips-jump-pc-region.s diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp b/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp index ddeec03ba784c..79c47d1b65084 100644 --- a/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp +++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp @@ -143,12 +143,15 @@ class MipsMCInstrAnalysis : public MCInstrAnalysis { return false; switch (Info->get(Inst.getOpcode()).OpInfo[NumOps - 1].OperandType) { case MCOI::OPERAND_UNKNOWN: - case MCOI::OPERAND_IMMEDIATE: - // jal, bal ... - Target = Inst.getOperand(NumOps - 1).getImm(); + case MCOI::OPERAND_IMMEDIATE: { + // j, jal, jalx, jals + // Absolute branch within the current 256 MB-aligned region + uint64_t Region = Addr & ~uint64_t(0xfffffff); + Target = Region + Inst.getOperand(NumOps - 1).getImm(); return true; + } case MCOI::OPERAND_PCREL: - // b, j, beq ... + // b, beq ... Target = Addr + Inst.getOperand(NumOps - 1).getImm(); return true; default: diff --git a/llvm/test/MC/Mips/micromips-jump-pc-region.s b/llvm/test/MC/Mips/micromips-jump-pc-region.s new file mode 100644 index 0000000000000..5f598fc016ffe --- /dev/null +++ b/llvm/test/MC/Mips/micromips-jump-pc-region.s @@ -0,0 +1,17 @@ +# RUN: llvm-mc -triple=mips -mcpu=mips32 -mattr=+micromips -filetype=obj < %s \ +# RUN: | llvm-objdump -d - | FileCheck %s + +.set noreorder + +# Force us into the second 256 MB region with a non-zero instruction index +.org 256*1024*1024 + 12 +# CHECK-LABEL: 1000000c foo: +# CHECK-NEXT: 1000000c: d4 00 00 06 j 12 +# CHECK-NEXT: 10000010: f4 00 00 08 jal 16 +# CHECK-NEXT: 10000014: f0 00 00 05 jalx 20 +# CHECK-NEXT: 10000018: 74 00 00 0c jals 24 +foo: + j 12 + jal 16 + jalx 20 + jals 24 diff --git a/llvm/test/MC/Mips/mips-jump-pc-region.s b/llvm/test/MC/Mips/mips-jump-pc-region.s new file mode 100644 index 0000000000000..2d6bbce3a492b --- /dev/null +++ b/llvm/test/MC/Mips/mips-jump-pc-region.s @@ -0,0 +1,17 @@ +# RUN: llvm-mc -triple=mips -mcpu=mips32 -filetype=obj < %s \ +# RUN: | llvm-objdump -d - | FileCheck %s +# RUN: llvm-mc -triple=mips64 -mcpu=mips64 -filetype=obj < %s \ +# RUN: | llvm-objdump -d - | FileCheck %s + +.set noreorder + +# Force us into the second 256 MB region with a non-zero instruction index +.org 256*1024*1024 + 12 +# CHECK-LABEL: 1000000c foo: +# CHECK-NEXT: 1000000c: 08 00 00 03 j 12 +# CHECK-NEXT: 10000010: 0c 00 00 04 jal 16 +# CHECK-NEXT: 10000014: 74 00 00 05 jalx 20 +foo: + j 12 + jal 16 + jalx 20 From 55ac7458280dddf253a235b2180d8053d5b05d0c Mon Sep 17 00:00:00 2001 From: Simon Atanasyan Date: Mon, 7 Oct 2019 14:01:37 +0000 Subject: [PATCH 111/254] [Mips] Always save RA when disabling frame pointer elimination This ensures that frame-based unwinding will continue to work when calling a noreturn function; there is not much use having the caller's frame pointer saved if you don't also have the caller's program counter. Patch by James Clarke. Differential Revision: https://reviews.llvm.org/D68542 llvm-svn: 373907 --- llvm/lib/Target/Mips/MipsSEFrameLowering.cpp | 7 +- llvm/test/CodeGen/Mips/cconv/vector.ll | 336 +++++++++++------- .../CodeGen/Mips/dynamic-stack-realignment.ll | 20 +- llvm/test/CodeGen/Mips/frame-address.ll | 25 +- .../CodeGen/Mips/no-frame-pointer-elim.ll | 37 ++ llvm/test/CodeGen/Mips/tnaked.ll | 2 +- llvm/test/CodeGen/Mips/v2i16tof32.ll | 9 +- 7 files changed, 276 insertions(+), 160 deletions(-) create mode 100644 llvm/test/CodeGen/Mips/no-frame-pointer-elim.ll diff --git a/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp b/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp index 55efe2cdc83a9..166ddea0431f3 100644 --- a/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp +++ b/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp @@ -865,12 +865,15 @@ void MipsSEFrameLowering::determineCalleeSaves(MachineFunction &MF, const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); MipsFunctionInfo *MipsFI = MF.getInfo(); MipsABIInfo ABI = STI.getABI(); + unsigned RA = ABI.IsN64() ? Mips::RA_64 : Mips::RA; unsigned FP = ABI.GetFramePtr(); unsigned BP = ABI.IsN64() ? Mips::S7_64 : Mips::S7; - // Mark $fp as used if function has dedicated frame pointer. - if (hasFP(MF)) + // Mark $ra and $fp as used if function has dedicated frame pointer. + if (hasFP(MF)) { + setAliasRegs(MF, SavedRegs, RA); setAliasRegs(MF, SavedRegs, FP); + } // Mark $s7 as used if function has dedicated base pointer. if (hasBP(MF)) setAliasRegs(MF, SavedRegs, BP); diff --git a/llvm/test/CodeGen/Mips/cconv/vector.ll b/llvm/test/CodeGen/Mips/cconv/vector.ll index 7881bf861c29d..4a1f9cb6a6a9d 100644 --- a/llvm/test/CodeGen/Mips/cconv/vector.ll +++ b/llvm/test/CodeGen/Mips/cconv/vector.ll @@ -50,23 +50,25 @@ define <2 x i8> @i8_2(<2 x i8> %a, <2 x i8> %b) { ; ; MIPS32R5EB-LABEL: i8_2: ; MIPS32R5EB: # %bb.0: -; MIPS32R5EB-NEXT: addiu $sp, $sp, -48 -; MIPS32R5EB-NEXT: .cfi_def_cfa_offset 48 -; MIPS32R5EB-NEXT: sw $fp, 44($sp) # 4-byte Folded Spill -; MIPS32R5EB-NEXT: .cfi_offset 30, -4 +; MIPS32R5EB-NEXT: addiu $sp, $sp, -64 +; MIPS32R5EB-NEXT: .cfi_def_cfa_offset 64 +; MIPS32R5EB-NEXT: sw $ra, 60($sp) # 4-byte Folded Spill +; MIPS32R5EB-NEXT: sw $fp, 56($sp) # 4-byte Folded Spill +; MIPS32R5EB-NEXT: .cfi_offset 31, -4 +; MIPS32R5EB-NEXT: .cfi_offset 30, -8 ; MIPS32R5EB-NEXT: move $fp, $sp ; MIPS32R5EB-NEXT: .cfi_def_cfa_register 30 ; MIPS32R5EB-NEXT: addiu $1, $zero, -16 ; MIPS32R5EB-NEXT: and $sp, $sp, $1 -; MIPS32R5EB-NEXT: sw $5, 36($sp) -; MIPS32R5EB-NEXT: sw $4, 40($sp) -; MIPS32R5EB-NEXT: lbu $1, 37($sp) +; MIPS32R5EB-NEXT: sw $5, 48($sp) +; MIPS32R5EB-NEXT: sw $4, 52($sp) +; MIPS32R5EB-NEXT: lbu $1, 49($sp) ; MIPS32R5EB-NEXT: sw $1, 28($sp) -; MIPS32R5EB-NEXT: lbu $1, 36($sp) +; MIPS32R5EB-NEXT: lbu $1, 48($sp) ; MIPS32R5EB-NEXT: sw $1, 20($sp) -; MIPS32R5EB-NEXT: lbu $1, 41($sp) +; MIPS32R5EB-NEXT: lbu $1, 53($sp) ; MIPS32R5EB-NEXT: sw $1, 12($sp) -; MIPS32R5EB-NEXT: lbu $1, 40($sp) +; MIPS32R5EB-NEXT: lbu $1, 52($sp) ; MIPS32R5EB-NEXT: sw $1, 4($sp) ; MIPS32R5EB-NEXT: ld.d $w0, 16($sp) ; MIPS32R5EB-NEXT: ld.d $w1, 0($sp) @@ -74,12 +76,13 @@ define <2 x i8> @i8_2(<2 x i8> %a, <2 x i8> %b) { ; MIPS32R5EB-NEXT: shf.w $w0, $w0, 177 ; MIPS32R5EB-NEXT: copy_s.w $1, $w0[1] ; MIPS32R5EB-NEXT: copy_s.w $2, $w0[3] -; MIPS32R5EB-NEXT: sb $2, 33($sp) -; MIPS32R5EB-NEXT: sb $1, 32($sp) -; MIPS32R5EB-NEXT: lhu $2, 32($sp) +; MIPS32R5EB-NEXT: sb $2, 45($sp) +; MIPS32R5EB-NEXT: sb $1, 44($sp) +; MIPS32R5EB-NEXT: lhu $2, 44($sp) ; MIPS32R5EB-NEXT: move $sp, $fp -; MIPS32R5EB-NEXT: lw $fp, 44($sp) # 4-byte Folded Reload -; MIPS32R5EB-NEXT: addiu $sp, $sp, 48 +; MIPS32R5EB-NEXT: lw $fp, 56($sp) # 4-byte Folded Reload +; MIPS32R5EB-NEXT: lw $ra, 60($sp) # 4-byte Folded Reload +; MIPS32R5EB-NEXT: addiu $sp, $sp, 64 ; MIPS32R5EB-NEXT: jr $ra ; MIPS32R5EB-NEXT: nop ; @@ -151,35 +154,38 @@ define <2 x i8> @i8_2(<2 x i8> %a, <2 x i8> %b) { ; ; MIPS32R5EL-LABEL: i8_2: ; MIPS32R5EL: # %bb.0: -; MIPS32R5EL-NEXT: addiu $sp, $sp, -48 -; MIPS32R5EL-NEXT: .cfi_def_cfa_offset 48 -; MIPS32R5EL-NEXT: sw $fp, 44($sp) # 4-byte Folded Spill -; MIPS32R5EL-NEXT: .cfi_offset 30, -4 +; MIPS32R5EL-NEXT: addiu $sp, $sp, -64 +; MIPS32R5EL-NEXT: .cfi_def_cfa_offset 64 +; MIPS32R5EL-NEXT: sw $ra, 60($sp) # 4-byte Folded Spill +; MIPS32R5EL-NEXT: sw $fp, 56($sp) # 4-byte Folded Spill +; MIPS32R5EL-NEXT: .cfi_offset 31, -4 +; MIPS32R5EL-NEXT: .cfi_offset 30, -8 ; MIPS32R5EL-NEXT: move $fp, $sp ; MIPS32R5EL-NEXT: .cfi_def_cfa_register 30 ; MIPS32R5EL-NEXT: addiu $1, $zero, -16 ; MIPS32R5EL-NEXT: and $sp, $sp, $1 -; MIPS32R5EL-NEXT: sw $5, 36($sp) -; MIPS32R5EL-NEXT: sw $4, 40($sp) -; MIPS32R5EL-NEXT: lbu $1, 37($sp) +; MIPS32R5EL-NEXT: sw $5, 48($sp) +; MIPS32R5EL-NEXT: sw $4, 52($sp) +; MIPS32R5EL-NEXT: lbu $1, 49($sp) ; MIPS32R5EL-NEXT: sw $1, 24($sp) -; MIPS32R5EL-NEXT: lbu $1, 36($sp) +; MIPS32R5EL-NEXT: lbu $1, 48($sp) ; MIPS32R5EL-NEXT: sw $1, 16($sp) -; MIPS32R5EL-NEXT: lbu $1, 41($sp) +; MIPS32R5EL-NEXT: lbu $1, 53($sp) ; MIPS32R5EL-NEXT: sw $1, 8($sp) -; MIPS32R5EL-NEXT: lbu $1, 40($sp) +; MIPS32R5EL-NEXT: lbu $1, 52($sp) ; MIPS32R5EL-NEXT: sw $1, 0($sp) ; MIPS32R5EL-NEXT: ld.d $w0, 16($sp) ; MIPS32R5EL-NEXT: ld.d $w1, 0($sp) ; MIPS32R5EL-NEXT: addv.d $w0, $w1, $w0 ; MIPS32R5EL-NEXT: copy_s.w $1, $w0[0] ; MIPS32R5EL-NEXT: copy_s.w $2, $w0[2] -; MIPS32R5EL-NEXT: sb $2, 33($sp) -; MIPS32R5EL-NEXT: sb $1, 32($sp) -; MIPS32R5EL-NEXT: lhu $2, 32($sp) +; MIPS32R5EL-NEXT: sb $2, 45($sp) +; MIPS32R5EL-NEXT: sb $1, 44($sp) +; MIPS32R5EL-NEXT: lhu $2, 44($sp) ; MIPS32R5EL-NEXT: move $sp, $fp -; MIPS32R5EL-NEXT: lw $fp, 44($sp) # 4-byte Folded Reload -; MIPS32R5EL-NEXT: addiu $sp, $sp, 48 +; MIPS32R5EL-NEXT: lw $fp, 56($sp) # 4-byte Folded Reload +; MIPS32R5EL-NEXT: lw $ra, 60($sp) # 4-byte Folded Reload +; MIPS32R5EL-NEXT: addiu $sp, $sp, 64 ; MIPS32R5EL-NEXT: jr $ra ; MIPS32R5EL-NEXT: nop ; @@ -312,36 +318,38 @@ define <2 x i8> @i8x2_7(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> %d, <2 x ; MIPS32R5EB: # %bb.0: # %entry ; MIPS32R5EB-NEXT: addiu $sp, $sp, -144 ; MIPS32R5EB-NEXT: .cfi_def_cfa_offset 144 -; MIPS32R5EB-NEXT: sw $fp, 140($sp) # 4-byte Folded Spill -; MIPS32R5EB-NEXT: .cfi_offset 30, -4 +; MIPS32R5EB-NEXT: sw $ra, 140($sp) # 4-byte Folded Spill +; MIPS32R5EB-NEXT: sw $fp, 136($sp) # 4-byte Folded Spill +; MIPS32R5EB-NEXT: .cfi_offset 31, -4 +; MIPS32R5EB-NEXT: .cfi_offset 30, -8 ; MIPS32R5EB-NEXT: move $fp, $sp ; MIPS32R5EB-NEXT: .cfi_def_cfa_register 30 ; MIPS32R5EB-NEXT: addiu $1, $zero, -16 ; MIPS32R5EB-NEXT: and $sp, $sp, $1 -; MIPS32R5EB-NEXT: sw $5, 132($sp) -; MIPS32R5EB-NEXT: sw $4, 136($sp) -; MIPS32R5EB-NEXT: lbu $1, 133($sp) +; MIPS32R5EB-NEXT: sw $5, 128($sp) +; MIPS32R5EB-NEXT: sw $4, 132($sp) +; MIPS32R5EB-NEXT: lbu $1, 129($sp) ; MIPS32R5EB-NEXT: sw $1, 76($sp) -; MIPS32R5EB-NEXT: lbu $1, 132($sp) +; MIPS32R5EB-NEXT: lbu $1, 128($sp) ; MIPS32R5EB-NEXT: sw $1, 68($sp) -; MIPS32R5EB-NEXT: lbu $1, 137($sp) +; MIPS32R5EB-NEXT: lbu $1, 133($sp) ; MIPS32R5EB-NEXT: sw $1, 60($sp) -; MIPS32R5EB-NEXT: lbu $1, 136($sp) +; MIPS32R5EB-NEXT: lbu $1, 132($sp) ; MIPS32R5EB-NEXT: sw $1, 52($sp) ; MIPS32R5EB-NEXT: ld.d $w0, 64($sp) ; MIPS32R5EB-NEXT: ld.d $w1, 48($sp) ; MIPS32R5EB-NEXT: addv.d $w0, $w1, $w0 -; MIPS32R5EB-NEXT: sw $6, 128($sp) -; MIPS32R5EB-NEXT: lbu $1, 129($sp) +; MIPS32R5EB-NEXT: sw $6, 124($sp) +; MIPS32R5EB-NEXT: lbu $1, 125($sp) ; MIPS32R5EB-NEXT: sw $1, 92($sp) -; MIPS32R5EB-NEXT: lbu $1, 128($sp) +; MIPS32R5EB-NEXT: lbu $1, 124($sp) ; MIPS32R5EB-NEXT: sw $1, 84($sp) ; MIPS32R5EB-NEXT: ld.d $w1, 80($sp) ; MIPS32R5EB-NEXT: addv.d $w0, $w0, $w1 -; MIPS32R5EB-NEXT: sw $7, 124($sp) -; MIPS32R5EB-NEXT: lbu $1, 125($sp) +; MIPS32R5EB-NEXT: sw $7, 120($sp) +; MIPS32R5EB-NEXT: lbu $1, 121($sp) ; MIPS32R5EB-NEXT: sw $1, 108($sp) -; MIPS32R5EB-NEXT: lbu $1, 124($sp) +; MIPS32R5EB-NEXT: lbu $1, 120($sp) ; MIPS32R5EB-NEXT: sw $1, 100($sp) ; MIPS32R5EB-NEXT: ld.d $w1, 96($sp) ; MIPS32R5EB-NEXT: addv.d $w0, $w0, $w1 @@ -366,11 +374,12 @@ define <2 x i8> @i8x2_7(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> %d, <2 x ; MIPS32R5EB-NEXT: shf.w $w0, $w0, 177 ; MIPS32R5EB-NEXT: copy_s.w $1, $w0[1] ; MIPS32R5EB-NEXT: copy_s.w $2, $w0[3] -; MIPS32R5EB-NEXT: sb $2, 121($sp) -; MIPS32R5EB-NEXT: sb $1, 120($sp) -; MIPS32R5EB-NEXT: lhu $2, 120($sp) +; MIPS32R5EB-NEXT: sb $2, 117($sp) +; MIPS32R5EB-NEXT: sb $1, 116($sp) +; MIPS32R5EB-NEXT: lhu $2, 116($sp) ; MIPS32R5EB-NEXT: move $sp, $fp -; MIPS32R5EB-NEXT: lw $fp, 140($sp) # 4-byte Folded Reload +; MIPS32R5EB-NEXT: lw $fp, 136($sp) # 4-byte Folded Reload +; MIPS32R5EB-NEXT: lw $ra, 140($sp) # 4-byte Folded Reload ; MIPS32R5EB-NEXT: addiu $sp, $sp, 144 ; MIPS32R5EB-NEXT: jr $ra ; MIPS32R5EB-NEXT: nop @@ -550,36 +559,38 @@ define <2 x i8> @i8x2_7(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> %d, <2 x ; MIPS32R5EL: # %bb.0: # %entry ; MIPS32R5EL-NEXT: addiu $sp, $sp, -144 ; MIPS32R5EL-NEXT: .cfi_def_cfa_offset 144 -; MIPS32R5EL-NEXT: sw $fp, 140($sp) # 4-byte Folded Spill -; MIPS32R5EL-NEXT: .cfi_offset 30, -4 +; MIPS32R5EL-NEXT: sw $ra, 140($sp) # 4-byte Folded Spill +; MIPS32R5EL-NEXT: sw $fp, 136($sp) # 4-byte Folded Spill +; MIPS32R5EL-NEXT: .cfi_offset 31, -4 +; MIPS32R5EL-NEXT: .cfi_offset 30, -8 ; MIPS32R5EL-NEXT: move $fp, $sp ; MIPS32R5EL-NEXT: .cfi_def_cfa_register 30 ; MIPS32R5EL-NEXT: addiu $1, $zero, -16 ; MIPS32R5EL-NEXT: and $sp, $sp, $1 -; MIPS32R5EL-NEXT: sw $5, 132($sp) -; MIPS32R5EL-NEXT: sw $4, 136($sp) -; MIPS32R5EL-NEXT: lbu $1, 133($sp) +; MIPS32R5EL-NEXT: sw $5, 128($sp) +; MIPS32R5EL-NEXT: sw $4, 132($sp) +; MIPS32R5EL-NEXT: lbu $1, 129($sp) ; MIPS32R5EL-NEXT: sw $1, 72($sp) -; MIPS32R5EL-NEXT: lbu $1, 132($sp) +; MIPS32R5EL-NEXT: lbu $1, 128($sp) ; MIPS32R5EL-NEXT: sw $1, 64($sp) -; MIPS32R5EL-NEXT: lbu $1, 137($sp) +; MIPS32R5EL-NEXT: lbu $1, 133($sp) ; MIPS32R5EL-NEXT: sw $1, 56($sp) -; MIPS32R5EL-NEXT: lbu $1, 136($sp) +; MIPS32R5EL-NEXT: lbu $1, 132($sp) ; MIPS32R5EL-NEXT: sw $1, 48($sp) ; MIPS32R5EL-NEXT: ld.d $w0, 64($sp) ; MIPS32R5EL-NEXT: ld.d $w1, 48($sp) ; MIPS32R5EL-NEXT: addv.d $w0, $w1, $w0 -; MIPS32R5EL-NEXT: sw $6, 128($sp) -; MIPS32R5EL-NEXT: lbu $1, 129($sp) +; MIPS32R5EL-NEXT: sw $6, 124($sp) +; MIPS32R5EL-NEXT: lbu $1, 125($sp) ; MIPS32R5EL-NEXT: sw $1, 88($sp) -; MIPS32R5EL-NEXT: lbu $1, 128($sp) +; MIPS32R5EL-NEXT: lbu $1, 124($sp) ; MIPS32R5EL-NEXT: sw $1, 80($sp) ; MIPS32R5EL-NEXT: ld.d $w1, 80($sp) ; MIPS32R5EL-NEXT: addv.d $w0, $w0, $w1 -; MIPS32R5EL-NEXT: sw $7, 124($sp) -; MIPS32R5EL-NEXT: lbu $1, 125($sp) +; MIPS32R5EL-NEXT: sw $7, 120($sp) +; MIPS32R5EL-NEXT: lbu $1, 121($sp) ; MIPS32R5EL-NEXT: sw $1, 104($sp) -; MIPS32R5EL-NEXT: lbu $1, 124($sp) +; MIPS32R5EL-NEXT: lbu $1, 120($sp) ; MIPS32R5EL-NEXT: sw $1, 96($sp) ; MIPS32R5EL-NEXT: ld.d $w1, 96($sp) ; MIPS32R5EL-NEXT: addv.d $w0, $w0, $w1 @@ -603,11 +614,12 @@ define <2 x i8> @i8x2_7(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> %d, <2 x ; MIPS32R5EL-NEXT: addv.d $w0, $w0, $w1 ; MIPS32R5EL-NEXT: copy_s.w $1, $w0[0] ; MIPS32R5EL-NEXT: copy_s.w $2, $w0[2] -; MIPS32R5EL-NEXT: sb $2, 121($sp) -; MIPS32R5EL-NEXT: sb $1, 120($sp) -; MIPS32R5EL-NEXT: lhu $2, 120($sp) +; MIPS32R5EL-NEXT: sb $2, 117($sp) +; MIPS32R5EL-NEXT: sb $1, 116($sp) +; MIPS32R5EL-NEXT: lhu $2, 116($sp) ; MIPS32R5EL-NEXT: move $sp, $fp -; MIPS32R5EL-NEXT: lw $fp, 140($sp) # 4-byte Folded Reload +; MIPS32R5EL-NEXT: lw $fp, 136($sp) # 4-byte Folded Reload +; MIPS32R5EL-NEXT: lw $ra, 140($sp) # 4-byte Folded Reload ; MIPS32R5EL-NEXT: addiu $sp, $sp, 144 ; MIPS32R5EL-NEXT: jr $ra ; MIPS32R5EL-NEXT: nop @@ -952,8 +964,10 @@ define <8 x i8> @i8_8(<8 x i8> %a, <8 x i8> %b) { ; MIPS32R5EB: # %bb.0: ; MIPS32R5EB-NEXT: addiu $sp, $sp, -48 ; MIPS32R5EB-NEXT: .cfi_def_cfa_offset 48 -; MIPS32R5EB-NEXT: sw $fp, 44($sp) # 4-byte Folded Spill -; MIPS32R5EB-NEXT: .cfi_offset 30, -4 +; MIPS32R5EB-NEXT: sw $ra, 44($sp) # 4-byte Folded Spill +; MIPS32R5EB-NEXT: sw $fp, 40($sp) # 4-byte Folded Spill +; MIPS32R5EB-NEXT: .cfi_offset 31, -4 +; MIPS32R5EB-NEXT: .cfi_offset 30, -8 ; MIPS32R5EB-NEXT: move $fp, $sp ; MIPS32R5EB-NEXT: .cfi_def_cfa_register 30 ; MIPS32R5EB-NEXT: addiu $1, $zero, -16 @@ -1019,7 +1033,8 @@ define <8 x i8> @i8_8(<8 x i8> %a, <8 x i8> %b) { ; MIPS32R5EB-NEXT: copy_s.w $2, $w0[1] ; MIPS32R5EB-NEXT: copy_s.w $3, $w0[3] ; MIPS32R5EB-NEXT: move $sp, $fp -; MIPS32R5EB-NEXT: lw $fp, 44($sp) # 4-byte Folded Reload +; MIPS32R5EB-NEXT: lw $fp, 40($sp) # 4-byte Folded Reload +; MIPS32R5EB-NEXT: lw $ra, 44($sp) # 4-byte Folded Reload ; MIPS32R5EB-NEXT: addiu $sp, $sp, 48 ; MIPS32R5EB-NEXT: jr $ra ; MIPS32R5EB-NEXT: nop @@ -1088,8 +1103,10 @@ define <8 x i8> @i8_8(<8 x i8> %a, <8 x i8> %b) { ; MIPS32R5EL: # %bb.0: ; MIPS32R5EL-NEXT: addiu $sp, $sp, -48 ; MIPS32R5EL-NEXT: .cfi_def_cfa_offset 48 -; MIPS32R5EL-NEXT: sw $fp, 44($sp) # 4-byte Folded Spill -; MIPS32R5EL-NEXT: .cfi_offset 30, -4 +; MIPS32R5EL-NEXT: sw $ra, 44($sp) # 4-byte Folded Spill +; MIPS32R5EL-NEXT: sw $fp, 40($sp) # 4-byte Folded Spill +; MIPS32R5EL-NEXT: .cfi_offset 31, -4 +; MIPS32R5EL-NEXT: .cfi_offset 30, -8 ; MIPS32R5EL-NEXT: move $fp, $sp ; MIPS32R5EL-NEXT: .cfi_def_cfa_register 30 ; MIPS32R5EL-NEXT: addiu $1, $zero, -16 @@ -1155,7 +1172,8 @@ define <8 x i8> @i8_8(<8 x i8> %a, <8 x i8> %b) { ; MIPS32R5EL-NEXT: copy_s.w $2, $w0[0] ; MIPS32R5EL-NEXT: copy_s.w $3, $w0[2] ; MIPS32R5EL-NEXT: move $sp, $fp -; MIPS32R5EL-NEXT: lw $fp, 44($sp) # 4-byte Folded Reload +; MIPS32R5EL-NEXT: lw $fp, 40($sp) # 4-byte Folded Reload +; MIPS32R5EL-NEXT: lw $ra, 44($sp) # 4-byte Folded Reload ; MIPS32R5EL-NEXT: addiu $sp, $sp, 48 ; MIPS32R5EL-NEXT: jr $ra ; MIPS32R5EL-NEXT: nop @@ -1471,23 +1489,25 @@ define <2 x i16> @i16_2(<2 x i16> %a, <2 x i16> %b) { ; ; MIPS32R5EB-LABEL: i16_2: ; MIPS32R5EB: # %bb.0: -; MIPS32R5EB-NEXT: addiu $sp, $sp, -48 -; MIPS32R5EB-NEXT: .cfi_def_cfa_offset 48 -; MIPS32R5EB-NEXT: sw $fp, 44($sp) # 4-byte Folded Spill -; MIPS32R5EB-NEXT: .cfi_offset 30, -4 +; MIPS32R5EB-NEXT: addiu $sp, $sp, -64 +; MIPS32R5EB-NEXT: .cfi_def_cfa_offset 64 +; MIPS32R5EB-NEXT: sw $ra, 60($sp) # 4-byte Folded Spill +; MIPS32R5EB-NEXT: sw $fp, 56($sp) # 4-byte Folded Spill +; MIPS32R5EB-NEXT: .cfi_offset 31, -4 +; MIPS32R5EB-NEXT: .cfi_offset 30, -8 ; MIPS32R5EB-NEXT: move $fp, $sp ; MIPS32R5EB-NEXT: .cfi_def_cfa_register 30 ; MIPS32R5EB-NEXT: addiu $1, $zero, -16 ; MIPS32R5EB-NEXT: and $sp, $sp, $1 -; MIPS32R5EB-NEXT: sw $5, 36($sp) -; MIPS32R5EB-NEXT: sw $4, 40($sp) -; MIPS32R5EB-NEXT: lhu $1, 38($sp) +; MIPS32R5EB-NEXT: sw $5, 48($sp) +; MIPS32R5EB-NEXT: sw $4, 52($sp) +; MIPS32R5EB-NEXT: lhu $1, 50($sp) ; MIPS32R5EB-NEXT: sw $1, 28($sp) -; MIPS32R5EB-NEXT: lhu $1, 36($sp) +; MIPS32R5EB-NEXT: lhu $1, 48($sp) ; MIPS32R5EB-NEXT: sw $1, 20($sp) -; MIPS32R5EB-NEXT: lhu $1, 42($sp) +; MIPS32R5EB-NEXT: lhu $1, 54($sp) ; MIPS32R5EB-NEXT: sw $1, 12($sp) -; MIPS32R5EB-NEXT: lhu $1, 40($sp) +; MIPS32R5EB-NEXT: lhu $1, 52($sp) ; MIPS32R5EB-NEXT: sw $1, 4($sp) ; MIPS32R5EB-NEXT: ld.d $w0, 16($sp) ; MIPS32R5EB-NEXT: ld.d $w1, 0($sp) @@ -1495,12 +1515,13 @@ define <2 x i16> @i16_2(<2 x i16> %a, <2 x i16> %b) { ; MIPS32R5EB-NEXT: shf.w $w0, $w0, 177 ; MIPS32R5EB-NEXT: copy_s.w $1, $w0[1] ; MIPS32R5EB-NEXT: copy_s.w $2, $w0[3] -; MIPS32R5EB-NEXT: sh $2, 34($sp) -; MIPS32R5EB-NEXT: sh $1, 32($sp) -; MIPS32R5EB-NEXT: lw $2, 32($sp) +; MIPS32R5EB-NEXT: sh $2, 46($sp) +; MIPS32R5EB-NEXT: sh $1, 44($sp) +; MIPS32R5EB-NEXT: lw $2, 44($sp) ; MIPS32R5EB-NEXT: move $sp, $fp -; MIPS32R5EB-NEXT: lw $fp, 44($sp) # 4-byte Folded Reload -; MIPS32R5EB-NEXT: addiu $sp, $sp, 48 +; MIPS32R5EB-NEXT: lw $fp, 56($sp) # 4-byte Folded Reload +; MIPS32R5EB-NEXT: lw $ra, 60($sp) # 4-byte Folded Reload +; MIPS32R5EB-NEXT: addiu $sp, $sp, 64 ; MIPS32R5EB-NEXT: jr $ra ; MIPS32R5EB-NEXT: nop ; @@ -1532,35 +1553,38 @@ define <2 x i16> @i16_2(<2 x i16> %a, <2 x i16> %b) { ; ; MIPS32R5EL-LABEL: i16_2: ; MIPS32R5EL: # %bb.0: -; MIPS32R5EL-NEXT: addiu $sp, $sp, -48 -; MIPS32R5EL-NEXT: .cfi_def_cfa_offset 48 -; MIPS32R5EL-NEXT: sw $fp, 44($sp) # 4-byte Folded Spill -; MIPS32R5EL-NEXT: .cfi_offset 30, -4 +; MIPS32R5EL-NEXT: addiu $sp, $sp, -64 +; MIPS32R5EL-NEXT: .cfi_def_cfa_offset 64 +; MIPS32R5EL-NEXT: sw $ra, 60($sp) # 4-byte Folded Spill +; MIPS32R5EL-NEXT: sw $fp, 56($sp) # 4-byte Folded Spill +; MIPS32R5EL-NEXT: .cfi_offset 31, -4 +; MIPS32R5EL-NEXT: .cfi_offset 30, -8 ; MIPS32R5EL-NEXT: move $fp, $sp ; MIPS32R5EL-NEXT: .cfi_def_cfa_register 30 ; MIPS32R5EL-NEXT: addiu $1, $zero, -16 ; MIPS32R5EL-NEXT: and $sp, $sp, $1 -; MIPS32R5EL-NEXT: sw $5, 36($sp) -; MIPS32R5EL-NEXT: sw $4, 40($sp) -; MIPS32R5EL-NEXT: lhu $1, 38($sp) +; MIPS32R5EL-NEXT: sw $5, 48($sp) +; MIPS32R5EL-NEXT: sw $4, 52($sp) +; MIPS32R5EL-NEXT: lhu $1, 50($sp) ; MIPS32R5EL-NEXT: sw $1, 24($sp) -; MIPS32R5EL-NEXT: lhu $1, 36($sp) +; MIPS32R5EL-NEXT: lhu $1, 48($sp) ; MIPS32R5EL-NEXT: sw $1, 16($sp) -; MIPS32R5EL-NEXT: lhu $1, 42($sp) +; MIPS32R5EL-NEXT: lhu $1, 54($sp) ; MIPS32R5EL-NEXT: sw $1, 8($sp) -; MIPS32R5EL-NEXT: lhu $1, 40($sp) +; MIPS32R5EL-NEXT: lhu $1, 52($sp) ; MIPS32R5EL-NEXT: sw $1, 0($sp) ; MIPS32R5EL-NEXT: ld.d $w0, 16($sp) ; MIPS32R5EL-NEXT: ld.d $w1, 0($sp) ; MIPS32R5EL-NEXT: addv.d $w0, $w1, $w0 ; MIPS32R5EL-NEXT: copy_s.w $1, $w0[0] ; MIPS32R5EL-NEXT: copy_s.w $2, $w0[2] -; MIPS32R5EL-NEXT: sh $2, 34($sp) -; MIPS32R5EL-NEXT: sh $1, 32($sp) -; MIPS32R5EL-NEXT: lw $2, 32($sp) +; MIPS32R5EL-NEXT: sh $2, 46($sp) +; MIPS32R5EL-NEXT: sh $1, 44($sp) +; MIPS32R5EL-NEXT: lw $2, 44($sp) ; MIPS32R5EL-NEXT: move $sp, $fp -; MIPS32R5EL-NEXT: lw $fp, 44($sp) # 4-byte Folded Reload -; MIPS32R5EL-NEXT: addiu $sp, $sp, 48 +; MIPS32R5EL-NEXT: lw $fp, 56($sp) # 4-byte Folded Reload +; MIPS32R5EL-NEXT: lw $ra, 60($sp) # 4-byte Folded Reload +; MIPS32R5EL-NEXT: addiu $sp, $sp, 64 ; MIPS32R5EL-NEXT: jr $ra ; MIPS32R5EL-NEXT: nop %1 = add <2 x i16> %a, %b @@ -1622,8 +1646,10 @@ define <4 x i16> @i16_4(<4 x i16> %a, <4 x i16> %b) { ; MIPS32R5EB: # %bb.0: ; MIPS32R5EB-NEXT: addiu $sp, $sp, -48 ; MIPS32R5EB-NEXT: .cfi_def_cfa_offset 48 -; MIPS32R5EB-NEXT: sw $fp, 44($sp) # 4-byte Folded Spill -; MIPS32R5EB-NEXT: .cfi_offset 30, -4 +; MIPS32R5EB-NEXT: sw $ra, 44($sp) # 4-byte Folded Spill +; MIPS32R5EB-NEXT: sw $fp, 40($sp) # 4-byte Folded Spill +; MIPS32R5EB-NEXT: .cfi_offset 31, -4 +; MIPS32R5EB-NEXT: .cfi_offset 30, -8 ; MIPS32R5EB-NEXT: move $fp, $sp ; MIPS32R5EB-NEXT: .cfi_def_cfa_register 30 ; MIPS32R5EB-NEXT: addiu $1, $zero, -16 @@ -1665,7 +1691,8 @@ define <4 x i16> @i16_4(<4 x i16> %a, <4 x i16> %b) { ; MIPS32R5EB-NEXT: copy_s.w $2, $w0[1] ; MIPS32R5EB-NEXT: copy_s.w $3, $w0[3] ; MIPS32R5EB-NEXT: move $sp, $fp -; MIPS32R5EB-NEXT: lw $fp, 44($sp) # 4-byte Folded Reload +; MIPS32R5EB-NEXT: lw $fp, 40($sp) # 4-byte Folded Reload +; MIPS32R5EB-NEXT: lw $ra, 44($sp) # 4-byte Folded Reload ; MIPS32R5EB-NEXT: addiu $sp, $sp, 48 ; MIPS32R5EB-NEXT: jr $ra ; MIPS32R5EB-NEXT: nop @@ -1710,8 +1737,10 @@ define <4 x i16> @i16_4(<4 x i16> %a, <4 x i16> %b) { ; MIPS32R5EL: # %bb.0: ; MIPS32R5EL-NEXT: addiu $sp, $sp, -48 ; MIPS32R5EL-NEXT: .cfi_def_cfa_offset 48 -; MIPS32R5EL-NEXT: sw $fp, 44($sp) # 4-byte Folded Spill -; MIPS32R5EL-NEXT: .cfi_offset 30, -4 +; MIPS32R5EL-NEXT: sw $ra, 44($sp) # 4-byte Folded Spill +; MIPS32R5EL-NEXT: sw $fp, 40($sp) # 4-byte Folded Spill +; MIPS32R5EL-NEXT: .cfi_offset 31, -4 +; MIPS32R5EL-NEXT: .cfi_offset 30, -8 ; MIPS32R5EL-NEXT: move $fp, $sp ; MIPS32R5EL-NEXT: .cfi_def_cfa_register 30 ; MIPS32R5EL-NEXT: addiu $1, $zero, -16 @@ -1753,7 +1782,8 @@ define <4 x i16> @i16_4(<4 x i16> %a, <4 x i16> %b) { ; MIPS32R5EL-NEXT: copy_s.w $2, $w0[0] ; MIPS32R5EL-NEXT: copy_s.w $3, $w0[2] ; MIPS32R5EL-NEXT: move $sp, $fp -; MIPS32R5EL-NEXT: lw $fp, 44($sp) # 4-byte Folded Reload +; MIPS32R5EL-NEXT: lw $fp, 40($sp) # 4-byte Folded Reload +; MIPS32R5EL-NEXT: lw $ra, 44($sp) # 4-byte Folded Reload ; MIPS32R5EL-NEXT: addiu $sp, $sp, 48 ; MIPS32R5EL-NEXT: jr $ra ; MIPS32R5EL-NEXT: nop @@ -1962,8 +1992,10 @@ define <2 x i32> @i32_2(<2 x i32> %a, <2 x i32> %b) { ; MIPS32R5EB: # %bb.0: ; MIPS32R5EB-NEXT: addiu $sp, $sp, -48 ; MIPS32R5EB-NEXT: .cfi_def_cfa_offset 48 -; MIPS32R5EB-NEXT: sw $fp, 44($sp) # 4-byte Folded Spill -; MIPS32R5EB-NEXT: .cfi_offset 30, -4 +; MIPS32R5EB-NEXT: sw $ra, 44($sp) # 4-byte Folded Spill +; MIPS32R5EB-NEXT: sw $fp, 40($sp) # 4-byte Folded Spill +; MIPS32R5EB-NEXT: .cfi_offset 31, -4 +; MIPS32R5EB-NEXT: .cfi_offset 30, -8 ; MIPS32R5EB-NEXT: move $fp, $sp ; MIPS32R5EB-NEXT: .cfi_def_cfa_register 30 ; MIPS32R5EB-NEXT: addiu $1, $zero, -16 @@ -1979,7 +2011,8 @@ define <2 x i32> @i32_2(<2 x i32> %a, <2 x i32> %b) { ; MIPS32R5EB-NEXT: copy_s.w $2, $w0[1] ; MIPS32R5EB-NEXT: copy_s.w $3, $w0[3] ; MIPS32R5EB-NEXT: move $sp, $fp -; MIPS32R5EB-NEXT: lw $fp, 44($sp) # 4-byte Folded Reload +; MIPS32R5EB-NEXT: lw $fp, 40($sp) # 4-byte Folded Reload +; MIPS32R5EB-NEXT: lw $ra, 44($sp) # 4-byte Folded Reload ; MIPS32R5EB-NEXT: addiu $sp, $sp, 48 ; MIPS32R5EB-NEXT: jr $ra ; MIPS32R5EB-NEXT: nop @@ -2010,8 +2043,10 @@ define <2 x i32> @i32_2(<2 x i32> %a, <2 x i32> %b) { ; MIPS32R5EL: # %bb.0: ; MIPS32R5EL-NEXT: addiu $sp, $sp, -48 ; MIPS32R5EL-NEXT: .cfi_def_cfa_offset 48 -; MIPS32R5EL-NEXT: sw $fp, 44($sp) # 4-byte Folded Spill -; MIPS32R5EL-NEXT: .cfi_offset 30, -4 +; MIPS32R5EL-NEXT: sw $ra, 44($sp) # 4-byte Folded Spill +; MIPS32R5EL-NEXT: sw $fp, 40($sp) # 4-byte Folded Spill +; MIPS32R5EL-NEXT: .cfi_offset 31, -4 +; MIPS32R5EL-NEXT: .cfi_offset 30, -8 ; MIPS32R5EL-NEXT: move $fp, $sp ; MIPS32R5EL-NEXT: .cfi_def_cfa_register 30 ; MIPS32R5EL-NEXT: addiu $1, $zero, -16 @@ -2026,7 +2061,8 @@ define <2 x i32> @i32_2(<2 x i32> %a, <2 x i32> %b) { ; MIPS32R5EL-NEXT: copy_s.w $2, $w0[0] ; MIPS32R5EL-NEXT: copy_s.w $3, $w0[2] ; MIPS32R5EL-NEXT: move $sp, $fp -; MIPS32R5EL-NEXT: lw $fp, 44($sp) # 4-byte Folded Reload +; MIPS32R5EL-NEXT: lw $fp, 40($sp) # 4-byte Folded Reload +; MIPS32R5EL-NEXT: lw $ra, 44($sp) # 4-byte Folded Reload ; MIPS32R5EL-NEXT: addiu $sp, $sp, 48 ; MIPS32R5EL-NEXT: jr $ra ; MIPS32R5EL-NEXT: nop @@ -2312,8 +2348,10 @@ define void @float_2(<2 x float> %a, <2 x float> %b) { ; MIPS32R5: # %bb.0: ; MIPS32R5-NEXT: addiu $sp, $sp, -48 ; MIPS32R5-NEXT: .cfi_def_cfa_offset 48 -; MIPS32R5-NEXT: sw $fp, 44($sp) # 4-byte Folded Spill -; MIPS32R5-NEXT: .cfi_offset 30, -4 +; MIPS32R5-NEXT: sw $ra, 44($sp) # 4-byte Folded Spill +; MIPS32R5-NEXT: sw $fp, 40($sp) # 4-byte Folded Spill +; MIPS32R5-NEXT: .cfi_offset 31, -4 +; MIPS32R5-NEXT: .cfi_offset 30, -8 ; MIPS32R5-NEXT: move $fp, $sp ; MIPS32R5-NEXT: .cfi_def_cfa_register 30 ; MIPS32R5-NEXT: addiu $1, $zero, -16 @@ -2331,7 +2369,8 @@ define void @float_2(<2 x float> %a, <2 x float> %b) { ; MIPS32R5-NEXT: swc1 $f1, 4($2) ; MIPS32R5-NEXT: swc1 $f0, %lo(float_res_v2f32)($1) ; MIPS32R5-NEXT: move $sp, $fp -; MIPS32R5-NEXT: lw $fp, 44($sp) # 4-byte Folded Reload +; MIPS32R5-NEXT: lw $fp, 40($sp) # 4-byte Folded Reload +; MIPS32R5-NEXT: lw $ra, 44($sp) # 4-byte Folded Reload ; MIPS32R5-NEXT: addiu $sp, $sp, 48 ; MIPS32R5-NEXT: jr $ra ; MIPS32R5-NEXT: nop @@ -2794,8 +2833,10 @@ define <8 x i8> @ret_8_i8() { ; MIPS32R5EB: # %bb.0: ; MIPS32R5EB-NEXT: addiu $sp, $sp, -32 ; MIPS32R5EB-NEXT: .cfi_def_cfa_offset 32 -; MIPS32R5EB-NEXT: sw $fp, 28($sp) # 4-byte Folded Spill -; MIPS32R5EB-NEXT: .cfi_offset 30, -4 +; MIPS32R5EB-NEXT: sw $ra, 28($sp) # 4-byte Folded Spill +; MIPS32R5EB-NEXT: sw $fp, 24($sp) # 4-byte Folded Spill +; MIPS32R5EB-NEXT: .cfi_offset 31, -4 +; MIPS32R5EB-NEXT: .cfi_offset 30, -8 ; MIPS32R5EB-NEXT: move $fp, $sp ; MIPS32R5EB-NEXT: .cfi_def_cfa_register 30 ; MIPS32R5EB-NEXT: addiu $1, $zero, -16 @@ -2810,7 +2851,8 @@ define <8 x i8> @ret_8_i8() { ; MIPS32R5EB-NEXT: copy_s.w $2, $w0[1] ; MIPS32R5EB-NEXT: copy_s.w $3, $w0[3] ; MIPS32R5EB-NEXT: move $sp, $fp -; MIPS32R5EB-NEXT: lw $fp, 28($sp) # 4-byte Folded Reload +; MIPS32R5EB-NEXT: lw $fp, 24($sp) # 4-byte Folded Reload +; MIPS32R5EB-NEXT: lw $ra, 28($sp) # 4-byte Folded Reload ; MIPS32R5EB-NEXT: addiu $sp, $sp, 32 ; MIPS32R5EB-NEXT: jr $ra ; MIPS32R5EB-NEXT: nop @@ -2829,8 +2871,10 @@ define <8 x i8> @ret_8_i8() { ; MIPS32R5EL: # %bb.0: ; MIPS32R5EL-NEXT: addiu $sp, $sp, -32 ; MIPS32R5EL-NEXT: .cfi_def_cfa_offset 32 -; MIPS32R5EL-NEXT: sw $fp, 28($sp) # 4-byte Folded Spill -; MIPS32R5EL-NEXT: .cfi_offset 30, -4 +; MIPS32R5EL-NEXT: sw $ra, 28($sp) # 4-byte Folded Spill +; MIPS32R5EL-NEXT: sw $fp, 24($sp) # 4-byte Folded Spill +; MIPS32R5EL-NEXT: .cfi_offset 31, -4 +; MIPS32R5EL-NEXT: .cfi_offset 30, -8 ; MIPS32R5EL-NEXT: move $fp, $sp ; MIPS32R5EL-NEXT: .cfi_def_cfa_register 30 ; MIPS32R5EL-NEXT: addiu $1, $zero, -16 @@ -2845,7 +2889,8 @@ define <8 x i8> @ret_8_i8() { ; MIPS32R5EL-NEXT: copy_s.w $2, $w0[0] ; MIPS32R5EL-NEXT: copy_s.w $3, $w0[2] ; MIPS32R5EL-NEXT: move $sp, $fp -; MIPS32R5EL-NEXT: lw $fp, 28($sp) # 4-byte Folded Reload +; MIPS32R5EL-NEXT: lw $fp, 24($sp) # 4-byte Folded Reload +; MIPS32R5EL-NEXT: lw $ra, 28($sp) # 4-byte Folded Reload ; MIPS32R5EL-NEXT: addiu $sp, $sp, 32 ; MIPS32R5EL-NEXT: jr $ra ; MIPS32R5EL-NEXT: nop @@ -2965,8 +3010,10 @@ define <4 x i16> @ret_4_i16() { ; MIPS32R5EB: # %bb.0: ; MIPS32R5EB-NEXT: addiu $sp, $sp, -32 ; MIPS32R5EB-NEXT: .cfi_def_cfa_offset 32 -; MIPS32R5EB-NEXT: sw $fp, 28($sp) # 4-byte Folded Spill -; MIPS32R5EB-NEXT: .cfi_offset 30, -4 +; MIPS32R5EB-NEXT: sw $ra, 28($sp) # 4-byte Folded Spill +; MIPS32R5EB-NEXT: sw $fp, 24($sp) # 4-byte Folded Spill +; MIPS32R5EB-NEXT: .cfi_offset 31, -4 +; MIPS32R5EB-NEXT: .cfi_offset 30, -8 ; MIPS32R5EB-NEXT: move $fp, $sp ; MIPS32R5EB-NEXT: .cfi_def_cfa_register 30 ; MIPS32R5EB-NEXT: addiu $1, $zero, -16 @@ -2981,7 +3028,8 @@ define <4 x i16> @ret_4_i16() { ; MIPS32R5EB-NEXT: copy_s.w $2, $w0[1] ; MIPS32R5EB-NEXT: copy_s.w $3, $w0[3] ; MIPS32R5EB-NEXT: move $sp, $fp -; MIPS32R5EB-NEXT: lw $fp, 28($sp) # 4-byte Folded Reload +; MIPS32R5EB-NEXT: lw $fp, 24($sp) # 4-byte Folded Reload +; MIPS32R5EB-NEXT: lw $ra, 28($sp) # 4-byte Folded Reload ; MIPS32R5EB-NEXT: addiu $sp, $sp, 32 ; MIPS32R5EB-NEXT: jr $ra ; MIPS32R5EB-NEXT: nop @@ -3000,8 +3048,10 @@ define <4 x i16> @ret_4_i16() { ; MIPS32R5EL: # %bb.0: ; MIPS32R5EL-NEXT: addiu $sp, $sp, -32 ; MIPS32R5EL-NEXT: .cfi_def_cfa_offset 32 -; MIPS32R5EL-NEXT: sw $fp, 28($sp) # 4-byte Folded Spill -; MIPS32R5EL-NEXT: .cfi_offset 30, -4 +; MIPS32R5EL-NEXT: sw $ra, 28($sp) # 4-byte Folded Spill +; MIPS32R5EL-NEXT: sw $fp, 24($sp) # 4-byte Folded Spill +; MIPS32R5EL-NEXT: .cfi_offset 31, -4 +; MIPS32R5EL-NEXT: .cfi_offset 30, -8 ; MIPS32R5EL-NEXT: move $fp, $sp ; MIPS32R5EL-NEXT: .cfi_def_cfa_register 30 ; MIPS32R5EL-NEXT: addiu $1, $zero, -16 @@ -3016,7 +3066,8 @@ define <4 x i16> @ret_4_i16() { ; MIPS32R5EL-NEXT: copy_s.w $2, $w0[0] ; MIPS32R5EL-NEXT: copy_s.w $3, $w0[2] ; MIPS32R5EL-NEXT: move $sp, $fp -; MIPS32R5EL-NEXT: lw $fp, 28($sp) # 4-byte Folded Reload +; MIPS32R5EL-NEXT: lw $fp, 24($sp) # 4-byte Folded Reload +; MIPS32R5EL-NEXT: lw $ra, 28($sp) # 4-byte Folded Reload ; MIPS32R5EL-NEXT: addiu $sp, $sp, 32 ; MIPS32R5EL-NEXT: jr $ra ; MIPS32R5EL-NEXT: nop @@ -3098,8 +3149,10 @@ define <2 x i32> @ret_2_i32() { ; MIPS32R5EB: # %bb.0: ; MIPS32R5EB-NEXT: addiu $sp, $sp, -32 ; MIPS32R5EB-NEXT: .cfi_def_cfa_offset 32 -; MIPS32R5EB-NEXT: sw $fp, 28($sp) # 4-byte Folded Spill -; MIPS32R5EB-NEXT: .cfi_offset 30, -4 +; MIPS32R5EB-NEXT: sw $ra, 28($sp) # 4-byte Folded Spill +; MIPS32R5EB-NEXT: sw $fp, 24($sp) # 4-byte Folded Spill +; MIPS32R5EB-NEXT: .cfi_offset 31, -4 +; MIPS32R5EB-NEXT: .cfi_offset 30, -8 ; MIPS32R5EB-NEXT: move $fp, $sp ; MIPS32R5EB-NEXT: .cfi_def_cfa_register 30 ; MIPS32R5EB-NEXT: addiu $1, $zero, -16 @@ -3114,7 +3167,8 @@ define <2 x i32> @ret_2_i32() { ; MIPS32R5EB-NEXT: copy_s.w $2, $w0[1] ; MIPS32R5EB-NEXT: copy_s.w $3, $w0[3] ; MIPS32R5EB-NEXT: move $sp, $fp -; MIPS32R5EB-NEXT: lw $fp, 28($sp) # 4-byte Folded Reload +; MIPS32R5EB-NEXT: lw $fp, 24($sp) # 4-byte Folded Reload +; MIPS32R5EB-NEXT: lw $ra, 28($sp) # 4-byte Folded Reload ; MIPS32R5EB-NEXT: addiu $sp, $sp, 32 ; MIPS32R5EB-NEXT: jr $ra ; MIPS32R5EB-NEXT: nop @@ -3133,8 +3187,10 @@ define <2 x i32> @ret_2_i32() { ; MIPS32R5EL: # %bb.0: ; MIPS32R5EL-NEXT: addiu $sp, $sp, -32 ; MIPS32R5EL-NEXT: .cfi_def_cfa_offset 32 -; MIPS32R5EL-NEXT: sw $fp, 28($sp) # 4-byte Folded Spill -; MIPS32R5EL-NEXT: .cfi_offset 30, -4 +; MIPS32R5EL-NEXT: sw $ra, 28($sp) # 4-byte Folded Spill +; MIPS32R5EL-NEXT: sw $fp, 24($sp) # 4-byte Folded Spill +; MIPS32R5EL-NEXT: .cfi_offset 31, -4 +; MIPS32R5EL-NEXT: .cfi_offset 30, -8 ; MIPS32R5EL-NEXT: move $fp, $sp ; MIPS32R5EL-NEXT: .cfi_def_cfa_register 30 ; MIPS32R5EL-NEXT: addiu $1, $zero, -16 @@ -3149,7 +3205,8 @@ define <2 x i32> @ret_2_i32() { ; MIPS32R5EL-NEXT: copy_s.w $2, $w0[0] ; MIPS32R5EL-NEXT: copy_s.w $3, $w0[2] ; MIPS32R5EL-NEXT: move $sp, $fp -; MIPS32R5EL-NEXT: lw $fp, 28($sp) # 4-byte Folded Reload +; MIPS32R5EL-NEXT: lw $fp, 24($sp) # 4-byte Folded Reload +; MIPS32R5EL-NEXT: lw $ra, 28($sp) # 4-byte Folded Reload ; MIPS32R5EL-NEXT: addiu $sp, $sp, 32 ; MIPS32R5EL-NEXT: jr $ra ; MIPS32R5EL-NEXT: nop @@ -6073,8 +6130,10 @@ define float @mixed_i8(<2 x float> %a, i8 %b, <2 x float> %c) { ; MIPS32R5: # %bb.0: # %entry ; MIPS32R5-NEXT: addiu $sp, $sp, -64 ; MIPS32R5-NEXT: .cfi_def_cfa_offset 64 -; MIPS32R5-NEXT: sw $fp, 60($sp) # 4-byte Folded Spill -; MIPS32R5-NEXT: .cfi_offset 30, -4 +; MIPS32R5-NEXT: sw $ra, 60($sp) # 4-byte Folded Spill +; MIPS32R5-NEXT: sw $fp, 56($sp) # 4-byte Folded Spill +; MIPS32R5-NEXT: .cfi_offset 31, -4 +; MIPS32R5-NEXT: .cfi_offset 30, -8 ; MIPS32R5-NEXT: move $fp, $sp ; MIPS32R5-NEXT: .cfi_def_cfa_register 30 ; MIPS32R5-NEXT: addiu $1, $zero, -16 @@ -6098,7 +6157,8 @@ define float @mixed_i8(<2 x float> %a, i8 %b, <2 x float> %c) { ; MIPS32R5-NEXT: splati.w $w1, $w0[1] ; MIPS32R5-NEXT: add.s $f0, $f0, $f1 ; MIPS32R5-NEXT: move $sp, $fp -; MIPS32R5-NEXT: lw $fp, 60($sp) # 4-byte Folded Reload +; MIPS32R5-NEXT: lw $fp, 56($sp) # 4-byte Folded Reload +; MIPS32R5-NEXT: lw $ra, 60($sp) # 4-byte Folded Reload ; MIPS32R5-NEXT: addiu $sp, $sp, 64 ; MIPS32R5-NEXT: jr $ra ; MIPS32R5-NEXT: nop diff --git a/llvm/test/CodeGen/Mips/dynamic-stack-realignment.ll b/llvm/test/CodeGen/Mips/dynamic-stack-realignment.ll index 5054b9cd02f47..50acd8ffb1eb1 100644 --- a/llvm/test/CodeGen/Mips/dynamic-stack-realignment.ll +++ b/llvm/test/CodeGen/Mips/dynamic-stack-realignment.ll @@ -163,8 +163,9 @@ entry: ; GP32-M: addiu $sp, $sp, -1024 ; GP32-MMR2: addiusp -1024 ; GP32-MMR6: addiu $sp, $sp, -1024 - ; GP32: sw $fp, 1020($sp) - ; GP32: sw $23, 1016($sp) + ; GP32: sw $ra, 1020($sp) + ; GP32: sw $fp, 1016($sp) + ; GP32: sw $23, 1012($sp) ; ; GP32: move $fp, $sp ; GP32: addiu $[[T0:[0-9]+|gp]], $zero, -512 @@ -177,8 +178,9 @@ entry: ; epilogue ; GP32: move $sp, $fp - ; GP32: lw $23, 1016($sp) - ; GP32: lw $fp, 1020($sp) + ; GP32: lw $23, 1012($sp) + ; GP32: lw $fp, 1016($sp) + ; GP32: lw $ra, 1020($sp) ; GP32-M: addiu $sp, $sp, 1024 ; GP32-MMR2: addiusp 1024 ; GP32-MMR6: addiu $sp, $sp, 1024 @@ -201,8 +203,9 @@ entry: ; FIXME: We are currently over-allocating stack space. ; N32: addiu $sp, $sp, -1024 ; N64: daddiu $sp, $sp, -1024 - ; GP64: sd $fp, 1016($sp) - ; GP64: sd $23, 1008($sp) + ; GP64: sd $ra, 1016($sp) + ; GP64: sd $fp, 1008($sp) + ; GP64: sd $23, 1000($sp) ; ; GP64: move $fp, $sp ; GP64: addiu $[[T0:[0-9]+|gp]], $zero, -512 @@ -215,8 +218,9 @@ entry: ; epilogue ; GP64: move $sp, $fp - ; GP64: ld $23, 1008($sp) - ; GP64: ld $fp, 1016($sp) + ; GP64: ld $23, 1000($sp) + ; GP64: ld $fp, 1008($sp) + ; GP64: ld $ra, 1016($sp) ; N32: addiu $sp, $sp, 1024 ; N64: daddiu $sp, $sp, 1024 diff --git a/llvm/test/CodeGen/Mips/frame-address.ll b/llvm/test/CodeGen/Mips/frame-address.ll index 0ab7da30e785f..f7ceb575c65ae 100644 --- a/llvm/test/CodeGen/Mips/frame-address.ll +++ b/llvm/test/CodeGen/Mips/frame-address.ll @@ -1,17 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -march=mipsel < %s | FileCheck %s declare i8* @llvm.frameaddress(i32) nounwind readnone define i8* @f() nounwind uwtable { +; CHECK-LABEL: f: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addiu $sp, $sp, -8 +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: sw $ra, 4($sp) # 4-byte Folded Spill +; CHECK-NEXT: sw $fp, 0($sp) # 4-byte Folded Spill +; CHECK-NEXT: .cfi_offset 31, -4 +; CHECK-NEXT: .cfi_offset 30, -8 +; CHECK-NEXT: move $fp, $sp +; CHECK-NEXT: .cfi_def_cfa_register 30 +; CHECK-NEXT: move $2, $fp +; CHECK-NEXT: move $sp, $fp +; CHECK-NEXT: lw $fp, 0($sp) # 4-byte Folded Reload +; CHECK-NEXT: lw $ra, 4($sp) # 4-byte Folded Reload +; CHECK-NEXT: jr $ra +; CHECK-NEXT: addiu $sp, $sp, 8 entry: %0 = call i8* @llvm.frameaddress(i32 0) ret i8* %0 - -; CHECK: .cfi_startproc -; CHECK: .cfi_def_cfa_offset 8 -; CHECK: .cfi_offset 30, -4 -; CHECK: move $fp, $sp -; CHECK: .cfi_def_cfa_register 30 -; CHECK: move $2, $fp -; CHECK: .cfi_endproc } diff --git a/llvm/test/CodeGen/Mips/no-frame-pointer-elim.ll b/llvm/test/CodeGen/Mips/no-frame-pointer-elim.ll new file mode 100644 index 0000000000000..5242ff4612976 --- /dev/null +++ b/llvm/test/CodeGen/Mips/no-frame-pointer-elim.ll @@ -0,0 +1,37 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=mips64 -relocation-model=static < %s \ +; RUN: | FileCheck %s --check-prefix STATIC +; RUN: llc -march=mips64 -relocation-model=pic < %s \ +; RUN: | FileCheck %s --check-prefix PIC + +declare dso_local void @callee() noreturn nounwind + +define dso_local void @caller() nounwind "no-frame-pointer-elim-non-leaf" { +; STATIC-LABEL: caller: +; STATIC: # %bb.0: # %entry +; STATIC-NEXT: daddiu $sp, $sp, -16 +; STATIC-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill +; STATIC-NEXT: sd $fp, 0($sp) # 8-byte Folded Spill +; STATIC-NEXT: move $fp, $sp +; STATIC-NEXT: jal callee +; STATIC-NEXT: nop +; +; PIC-LABEL: caller: +; PIC: # %bb.0: # %entry +; PIC-NEXT: daddiu $sp, $sp, -32 +; PIC-NEXT: sd $ra, 24($sp) # 8-byte Folded Spill +; PIC-NEXT: sd $fp, 16($sp) # 8-byte Folded Spill +; PIC-NEXT: sd $gp, 8($sp) # 8-byte Folded Spill +; PIC-NEXT: move $fp, $sp +; PIC-NEXT: lui $1, %hi(%neg(%gp_rel(caller))) +; PIC-NEXT: daddu $1, $1, $25 +; PIC-NEXT: daddiu $gp, $1, %lo(%neg(%gp_rel(caller))) +; PIC-NEXT: ld $25, %call16(callee)($gp) +; PIC-NEXT: .reloc .Ltmp0, R_MIPS_JALR, callee +; PIC-NEXT: .Ltmp0: +; PIC-NEXT: jalr $25 +; PIC-NEXT: nop +entry: + tail call void @callee() + unreachable +} diff --git a/llvm/test/CodeGen/Mips/tnaked.ll b/llvm/test/CodeGen/Mips/tnaked.ll index 7dff19c5d0009..e88396bac5080 100644 --- a/llvm/test/CodeGen/Mips/tnaked.ll +++ b/llvm/test/CodeGen/Mips/tnaked.ll @@ -21,7 +21,7 @@ entry: ; CHECK: .ent tnonaked ; CHECK-LABEL: tnonaked: ; CHECK: .frame $fp,8,$ra -; CHECK: .mask 0x40000000,-4 +; CHECK: .mask 0xc0000000,-4 ; CHECK: .fmask 0x00000000,0 ; CHECK: addiu $sp, $sp, -8 diff --git a/llvm/test/CodeGen/Mips/v2i16tof32.ll b/llvm/test/CodeGen/Mips/v2i16tof32.ll index 334413b03d58d..ab9fa9eefd413 100644 --- a/llvm/test/CodeGen/Mips/v2i16tof32.ll +++ b/llvm/test/CodeGen/Mips/v2i16tof32.ll @@ -9,8 +9,10 @@ define float @f(<8 x i16>* %a) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addiu $sp, $sp, -32 ; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: sw $fp, 28($sp) # 4-byte Folded Spill -; CHECK-NEXT: .cfi_offset 30, -4 +; CHECK-NEXT: sw $ra, 28($sp) # 4-byte Folded Spill +; CHECK-NEXT: sw $fp, 24($sp) # 4-byte Folded Spill +; CHECK-NEXT: .cfi_offset 31, -4 +; CHECK-NEXT: .cfi_offset 30, -8 ; CHECK-NEXT: move $fp, $sp ; CHECK-NEXT: .cfi_def_cfa_register 30 ; CHECK-NEXT: addiu $1, $zero, -16 @@ -25,7 +27,8 @@ define float @f(<8 x i16>* %a) { ; CHECK-NEXT: sw $1, 4($sp) ; CHECK-NEXT: mtc1 $2, $f0 ; CHECK-NEXT: move $sp, $fp -; CHECK-NEXT: lw $fp, 28($sp) # 4-byte Folded Reload +; CHECK-NEXT: lw $fp, 24($sp) # 4-byte Folded Reload +; CHECK-NEXT: lw $ra, 28($sp) # 4-byte Folded Reload ; CHECK-NEXT: jr $ra ; CHECK-NEXT: addiu $sp, $sp, 32 entry: From a6a70415c85056ffd3f0b8506a67089365b0322f Mon Sep 17 00:00:00 2001 From: Amaury Sechet Date: Mon, 7 Oct 2019 14:10:21 +0000 Subject: [PATCH 112/254] Regenerate ptr-rotate.ll . NFC llvm-svn: 373908 --- llvm/test/CodeGen/X86/ptr-rotate.ll | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/llvm/test/CodeGen/X86/ptr-rotate.ll b/llvm/test/CodeGen/X86/ptr-rotate.ll index fbd13b5036447..c2365c607d069 100644 --- a/llvm/test/CodeGen/X86/ptr-rotate.ll +++ b/llvm/test/CodeGen/X86/ptr-rotate.ll @@ -1,11 +1,16 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=i386-apple-darwin -mcpu=corei7 -o - < %s | FileCheck %s define i32 @func(i8* %A) nounwind readnone { +; CHECK-LABEL: func: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: roll $27, %eax +; CHECK-NEXT: retl entry: %tmp = ptrtoint i8* %A to i32 %shr = lshr i32 %tmp, 5 %shl = shl i32 %tmp, 27 %or = or i32 %shr, %shl -; CHECK: roll $27 ret i32 %or } From 9f4de84eb0e0f69de66e5fdf99b63678264f3726 Mon Sep 17 00:00:00 2001 From: "Kevin P. Neal" Date: Mon, 7 Oct 2019 14:14:46 +0000 Subject: [PATCH 113/254] Fix another sphinx warning. Differential Revision: https://reviews.llvm.org/D64746 llvm-svn: 373909 --- llvm/docs/LangRef.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index a5710e2cbbad8..d9a38907c920a 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -16259,7 +16259,7 @@ would and handles error conditions in the same way. '``llvm.experimental.constrained.lround``' Intrinsic -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Syntax: """"""" @@ -16297,7 +16297,7 @@ would and handles error conditions in the same way. '``llvm.experimental.constrained.llround``' Intrinsic -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Syntax: """"""" From edf5027689c5b63c94262c17a7b8a87de9c55fb1 Mon Sep 17 00:00:00 2001 From: Utkarsh Saxena Date: Mon, 7 Oct 2019 14:20:46 +0000 Subject: [PATCH 114/254] [clang] Add test for FindNextToken in Lexer. Reviewers: ilya-biryukov Subscribers: cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D68565 llvm-svn: 373910 --- clang/unittests/Lex/LexerTest.cpp | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/clang/unittests/Lex/LexerTest.cpp b/clang/unittests/Lex/LexerTest.cpp index 2295a901008bf..ad9010e5a8b71 100644 --- a/clang/unittests/Lex/LexerTest.cpp +++ b/clang/unittests/Lex/LexerTest.cpp @@ -11,9 +11,11 @@ #include "clang/Basic/DiagnosticOptions.h" #include "clang/Basic/FileManager.h" #include "clang/Basic/LangOptions.h" +#include "clang/Basic/SourceLocation.h" #include "clang/Basic/SourceManager.h" #include "clang/Basic/TargetInfo.h" #include "clang/Basic/TargetOptions.h" +#include "clang/Basic/TokenKinds.h" #include "clang/Lex/HeaderSearch.h" #include "clang/Lex/HeaderSearchOptions.h" #include "clang/Lex/MacroArgs.h" @@ -21,11 +23,13 @@ #include "clang/Lex/ModuleLoader.h" #include "clang/Lex/Preprocessor.h" #include "clang/Lex/PreprocessorOptions.h" +#include "gmock/gmock.h" #include "gtest/gtest.h" - -using namespace clang; +#include namespace { +using namespace clang; +using testing::ElementsAre; // The test fixture. class LexerTest : public ::testing::Test { @@ -535,4 +539,21 @@ TEST_F(LexerTest, CharRangeOffByOne) { EXPECT_EQ(Lexer::getSourceText(CR, SourceMgr, LangOpts), "MOO"); // Was "MO". } +TEST_F(LexerTest, FindNextToken) { + Lex("int abcd = 0;\n" + "int xyz = abcd;\n"); + std::vector GeneratedByNextToken; + SourceLocation Loc = + SourceMgr.getLocForStartOfFile(SourceMgr.getMainFileID()); + while (true) { + auto T = Lexer::findNextToken(Loc, SourceMgr, LangOpts); + ASSERT_TRUE(T.hasValue()); + if (T->is(tok::eof)) + break; + GeneratedByNextToken.push_back(getSourceText(*T, *T)); + Loc = T->getLocation(); + } + EXPECT_THAT(GeneratedByNextToken, ElementsAre("abcd", "=", "0", ";", "int", + "xyz", "=", "abcd", ";")); +} } // anonymous namespace From dc4d908d6ebdee57f65e5b82bf598f45439e8f76 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 7 Oct 2019 14:25:46 +0000 Subject: [PATCH 115/254] Sema - silence static analyzer getAs<> null dereference warnings. NFCI. The static analyzer is warning about potential null dereferences, but in these cases we should be able to use castAs<> directly and if not assert will fire for us. llvm-svn: 373911 --- clang/lib/Sema/SemaAccess.cpp | 2 +- clang/lib/Sema/SemaChecking.cpp | 14 +++++++------- clang/lib/Sema/SemaCoroutine.cpp | 2 +- clang/lib/Sema/SemaDecl.cpp | 4 ++-- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/clang/lib/Sema/SemaAccess.cpp b/clang/lib/Sema/SemaAccess.cpp index b20f279dcad28..9dbb93322b7d4 100644 --- a/clang/lib/Sema/SemaAccess.cpp +++ b/clang/lib/Sema/SemaAccess.cpp @@ -1551,7 +1551,7 @@ Sema::AccessResult Sema::CheckUnresolvedMemberAccess(UnresolvedMemberExpr *E, QualType BaseType = E->getBaseType(); if (E->isArrow()) - BaseType = BaseType->getAs()->getPointeeType(); + BaseType = BaseType->castAs()->getPointeeType(); AccessTarget Entity(Context, AccessTarget::Member, E->getNamingClass(), Found, BaseType); diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index e65ad94d2daeb..eeddff6c7144a 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -484,7 +484,7 @@ static bool checkOpenCLBlockArgs(Sema &S, Expr *BlockArg) { const BlockPointerType *BPT = cast(BlockArg->getType().getCanonicalType()); ArrayRef Params = - BPT->getPointeeType()->getAs()->getParamTypes(); + BPT->getPointeeType()->castAs()->getParamTypes(); unsigned ArgCounter = 0; bool IllegalParams = false; // Iterate through the block parameters until either one is found that is not @@ -583,7 +583,7 @@ static bool checkOpenCLEnqueueVariadicArgs(Sema &S, CallExpr *TheCall, const BlockPointerType *BPT = cast(BlockArg->getType().getCanonicalType()); unsigned NumBlockParams = - BPT->getPointeeType()->getAs()->getNumParams(); + BPT->getPointeeType()->castAs()->getNumParams(); unsigned TotalNumArgs = TheCall->getNumArgs(); // For each argument passed to the block, a corresponding uint needs to @@ -676,7 +676,7 @@ static bool SemaOpenCLBuiltinEnqueueKernel(Sema &S, CallExpr *TheCall) { // we have a block type, check the prototype const BlockPointerType *BPT = cast(Arg3->getType().getCanonicalType()); - if (BPT->getPointeeType()->getAs()->getNumParams() > 0) { + if (BPT->getPointeeType()->castAs()->getNumParams() > 0) { S.Diag(Arg3->getBeginLoc(), diag::err_opencl_enqueue_kernel_blocks_no_args); return true; @@ -4664,7 +4664,7 @@ ExprResult Sema::BuildAtomicExpr(SourceRange CallRange, SourceRange ExprRange, << Ptr->getSourceRange(); return ExprError(); } - ValType = AtomTy->getAs()->getValueType(); + ValType = AtomTy->castAs()->getValueType(); } else if (Form != Load && Form != LoadCopy) { if (ValType.isConstQualified()) { Diag(ExprRange.getBegin(), diag::err_atomic_op_needs_non_const_pointer) @@ -5473,7 +5473,7 @@ static bool checkVAStartABI(Sema &S, unsigned BuiltinID, Expr *Fn) { if (IsX64 || IsAArch64) { CallingConv CC = CC_C; if (const FunctionDecl *FD = S.getCurFunctionDecl()) - CC = FD->getType()->getAs()->getCallConv(); + CC = FD->getType()->castAs()->getCallConv(); if (IsMSVAStart) { // Don't allow this in System V ABI functions. if (CC == CC_X86_64SysV || (!IsWindows && CC != CC_Win64)) @@ -5603,7 +5603,7 @@ bool Sema::SemaBuiltinVAStart(unsigned BuiltinID, CallExpr *TheCall) { return false; if (!Type->isEnumeralType()) return true; - const EnumDecl *ED = Type->getAs()->getDecl(); + const EnumDecl *ED = Type->castAs()->getDecl(); return !(ED && Context.typesAreCompatible(ED->getPromotionType(), Type)); }()) { @@ -10756,7 +10756,7 @@ static bool AnalyzeBitFieldAssignment(Sema &S, FieldDecl *Bitfield, Expr *Init, return false; if (BitfieldType->isEnumeralType()) { - EnumDecl *BitfieldEnumDecl = BitfieldType->getAs()->getDecl(); + EnumDecl *BitfieldEnumDecl = BitfieldType->castAs()->getDecl(); // If the underlying enum type was not explicitly specified as an unsigned // type and the enum contain only positive values, MSVC++ will cause an // inconsistency by storing this as a signed type. diff --git a/clang/lib/Sema/SemaCoroutine.cpp b/clang/lib/Sema/SemaCoroutine.cpp index f0347af6a1bb1..fd2fd35921cef 100644 --- a/clang/lib/Sema/SemaCoroutine.cpp +++ b/clang/lib/Sema/SemaCoroutine.cpp @@ -83,7 +83,7 @@ static QualType lookupPromiseType(Sema &S, const FunctionDecl *FD, // ref-qualifier or with the & ref-qualifier // -- "rvalue reference to cv X" for functions declared with the && // ref-qualifier - QualType T = MD->getThisType()->getAs()->getPointeeType(); + QualType T = MD->getThisType()->castAs()->getPointeeType(); T = FnType->getRefQualifier() == RQ_RValue ? S.Context.getRValueReferenceType(T) : S.Context.getLValueReferenceType(T, /*SpelledAsLValue*/ true); diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 6114eb8e8d6e9..db89e54c51749 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -2172,7 +2172,7 @@ void Sema::MergeTypedefNameDecl(Scope *S, TypedefNameDecl *New, if (!T->isPointerType()) break; if (!T->isVoidPointerType()) { - QualType PT = T->getAs()->getPointeeType(); + QualType PT = T->castAs()->getPointeeType(); if (!PT->isStructureType()) break; } @@ -8152,7 +8152,7 @@ static FunctionDecl *CreateNewFunctionDecl(Sema &SemaRef, Declarator &D, // the class has been completely parsed. if (!DC->isRecord() && SemaRef.RequireNonAbstractType( - D.getIdentifierLoc(), R->getAs()->getReturnType(), + D.getIdentifierLoc(), R->castAs()->getReturnType(), diag::err_abstract_type_in_decl, SemaRef.AbstractReturnType)) D.setInvalidType(); From a14ffc7eb741de4fd7484350d11947dea40991fd Mon Sep 17 00:00:00 2001 From: David Greene Date: Mon, 7 Oct 2019 14:37:20 +0000 Subject: [PATCH 116/254] Allow update_test_checks.py to not scrub names. Add a --preserve-names option to tell the script not to replace IR names. Sometimes tests want those names. For example if a test is looking for a modification to an existing instruction we'll want to make the names. Differential Revision: https://reviews.llvm.org/D68081 llvm-svn: 373912 --- llvm/utils/UpdateTestChecks/common.py | 6 ++++-- llvm/utils/update_test_checks.py | 5 ++++- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/llvm/utils/UpdateTestChecks/common.py b/llvm/utils/UpdateTestChecks/common.py index ecb3a0f0a72e2..972b65505b45c 100644 --- a/llvm/utils/UpdateTestChecks/common.py +++ b/llvm/utils/UpdateTestChecks/common.py @@ -267,10 +267,12 @@ def add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, output_lines.append(comment_marker) break -def add_ir_checks(output_lines, comment_marker, prefix_list, func_dict, func_name): +def add_ir_checks(output_lines, comment_marker, prefix_list, func_dict, + func_name, preserve_names): # Label format is based on IR string. check_label_format = '{} %s-LABEL: @%s('.format(comment_marker) - add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, check_label_format, False, False) + add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, + check_label_format, False, preserve_names) def add_analyze_checks(output_lines, comment_marker, prefix_list, func_dict, func_name): check_label_format = '{} %s-LABEL: \'%s\''.format(comment_marker) diff --git a/llvm/utils/update_test_checks.py b/llvm/utils/update_test_checks.py index 5e0d4bac22f07..ec026022fc2f7 100755 --- a/llvm/utils/update_test_checks.py +++ b/llvm/utils/update_test_checks.py @@ -64,6 +64,8 @@ def main(): '--function', help='The function in the test file to update') parser.add_argument('-u', '--update-only', action='store_true', help='Only update test if it was already autogened') + parser.add_argument('-p', '--preserve-names', action='store_true', + help='Do not scrub IR names') parser.add_argument('tests', nargs='+') args = parser.parse_args() @@ -174,7 +176,8 @@ def main(): continue # Print out the various check lines here. - common.add_ir_checks(output_lines, ';', prefix_list, func_dict, func_name) + common.add_ir_checks(output_lines, ';', prefix_list, func_dict, + func_name, args.preserve_names) is_in_function_start = False if is_in_function: From b743f18b1f4a140b370b865a681bbed5ceaeab11 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Mon, 7 Oct 2019 14:48:27 +0000 Subject: [PATCH 117/254] [LoopVectorize] add test that asserted after cost model change (PR43582); NFC llvm-svn: 373913 --- .../LoopVectorize/X86/cost-model-assert.ll | 127 ++++++++++++++++++ 1 file changed, 127 insertions(+) create mode 100644 llvm/test/Transforms/LoopVectorize/X86/cost-model-assert.ll diff --git a/llvm/test/Transforms/LoopVectorize/X86/cost-model-assert.ll b/llvm/test/Transforms/LoopVectorize/X86/cost-model-assert.ll new file mode 100644 index 0000000000000..90f0ae4cd865c --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/X86/cost-model-assert.ll @@ -0,0 +1,127 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -loop-vectorize -S | FileCheck %s + +; This is a bugpoint reduction of a test from PR43582: +; https://bugs.llvm.org/show_bug.cgi?id=43582 + +; ...but it's over-simplifying the underlying question: +; TODO: Should this be vectorized rather than allowing the backend to load combine? +; The original code is a bswap pattern. + +target datalayout = "e-m:w-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-w64-windows-gnu" + +define void @cff_index_load_offsets(i1 %cond, i8 %x, i8* %p) #0 { +; CHECK-LABEL: @cff_index_load_offsets( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[COND:%.*]], label [[IF_THEN:%.*]], label [[EXIT:%.*]] +; CHECK: if.then: +; CHECK-NEXT: br i1 true, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i8> undef, i8 [[X:%.*]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT]], <4 x i8> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, i8* null, i64 [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT]] to <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw <4 x i32> [[TMP2]], +; CHECK-NEXT: [[TMP4:%.*]] = load i8, i8* [[P:%.*]], align 1, !tbaa !1 +; CHECK-NEXT: [[TMP5:%.*]] = load i8, i8* [[P]], align 1, !tbaa !1 +; CHECK-NEXT: [[TMP6:%.*]] = load i8, i8* [[P]], align 1, !tbaa !1 +; CHECK-NEXT: [[TMP7:%.*]] = load i8, i8* [[P]], align 1, !tbaa !1 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i8> undef, i8 [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x i8> [[TMP8]], i8 [[TMP5]], i32 1 +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i8> [[TMP9]], i8 [[TMP6]], i32 2 +; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i8> [[TMP10]], i8 [[TMP7]], i32 3 +; CHECK-NEXT: [[TMP12:%.*]] = zext <4 x i8> [[TMP11]] to <4 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = shl nuw nsw <4 x i32> [[TMP12]], +; CHECK-NEXT: [[TMP14:%.*]] = or <4 x i32> [[TMP13]], [[TMP3]] +; CHECK-NEXT: [[TMP15:%.*]] = load i8, i8* undef, align 1, !tbaa !1 +; CHECK-NEXT: [[TMP16:%.*]] = load i8, i8* undef, align 1, !tbaa !1 +; CHECK-NEXT: [[TMP17:%.*]] = load i8, i8* undef, align 1, !tbaa !1 +; CHECK-NEXT: [[TMP18:%.*]] = load i8, i8* undef, align 1, !tbaa !1 +; CHECK-NEXT: [[TMP19:%.*]] = or <4 x i32> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[TMP20:%.*]] = or <4 x i32> [[TMP19]], zeroinitializer +; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i32> [[TMP20]], i32 0 +; CHECK-NEXT: store i32 [[TMP21]], i32* undef, align 4, !tbaa !4 +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x i32> [[TMP20]], i32 1 +; CHECK-NEXT: store i32 [[TMP22]], i32* undef, align 4, !tbaa !4 +; CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x i32> [[TMP20]], i32 2 +; CHECK-NEXT: store i32 [[TMP23]], i32* undef, align 4, !tbaa !4 +; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x i32> [[TMP20]], i32 3 +; CHECK-NEXT: store i32 [[TMP24]], i32* undef, align 4, !tbaa !4 +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0 +; CHECK-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !6 +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1, 0 +; CHECK-NEXT: br i1 [[CMP_N]], label [[SW_EPILOG:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8* [ null, [[MIDDLE_BLOCK]] ], [ null, [[IF_THEN]] ] +; CHECK-NEXT: br label [[FOR_BODY68:%.*]] +; CHECK: for.body68: +; CHECK-NEXT: [[P_359:%.*]] = phi i8* [ [[ADD_PTR86:%.*]], [[FOR_BODY68]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[CONV70:%.*]] = zext i8 [[X]] to i32 +; CHECK-NEXT: [[SHL71:%.*]] = shl nuw i32 [[CONV70]], 24 +; CHECK-NEXT: [[TMP26:%.*]] = load i8, i8* [[P]], align 1, !tbaa !1 +; CHECK-NEXT: [[CONV73:%.*]] = zext i8 [[TMP26]] to i32 +; CHECK-NEXT: [[SHL74:%.*]] = shl nuw nsw i32 [[CONV73]], 16 +; CHECK-NEXT: [[OR75:%.*]] = or i32 [[SHL74]], [[SHL71]] +; CHECK-NEXT: [[TMP27:%.*]] = load i8, i8* undef, align 1, !tbaa !1 +; CHECK-NEXT: [[SHL78:%.*]] = shl nuw nsw i32 undef, 8 +; CHECK-NEXT: [[OR79:%.*]] = or i32 [[OR75]], [[SHL78]] +; CHECK-NEXT: [[CONV81:%.*]] = zext i8 undef to i32 +; CHECK-NEXT: [[OR83:%.*]] = or i32 [[OR79]], [[CONV81]] +; CHECK-NEXT: store i32 [[OR83]], i32* undef, align 4, !tbaa !4 +; CHECK-NEXT: [[ADD_PTR86]] = getelementptr inbounds i8, i8* [[P_359]], i64 4 +; CHECK-NEXT: [[CMP66:%.*]] = icmp ult i8* [[ADD_PTR86]], undef +; CHECK-NEXT: br i1 [[CMP66]], label [[FOR_BODY68]], label [[SW_EPILOG]], !llvm.loop !8 +; CHECK: sw.epilog: +; CHECK-NEXT: unreachable +; CHECK: Exit: +; CHECK-NEXT: ret void +; +entry: + br i1 %cond, label %if.then, label %Exit + +if.then: ; preds = %entry + br label %for.body68 + +for.body68: ; preds = %for.body68, %if.then + %p.359 = phi i8* [ %add.ptr86, %for.body68 ], [ null, %if.then ] + %conv70 = zext i8 %x to i32 + %shl71 = shl nuw i32 %conv70, 24 + %0 = load i8, i8* %p, align 1, !tbaa !1 + %conv73 = zext i8 %0 to i32 + %shl74 = shl nuw nsw i32 %conv73, 16 + %or75 = or i32 %shl74, %shl71 + %1 = load i8, i8* undef, align 1, !tbaa !1 + %shl78 = shl nuw nsw i32 undef, 8 + %or79 = or i32 %or75, %shl78 + %conv81 = zext i8 undef to i32 + %or83 = or i32 %or79, %conv81 + store i32 %or83, i32* undef, align 4, !tbaa !4 + %add.ptr86 = getelementptr inbounds i8, i8* %p.359, i64 4 + %cmp66 = icmp ult i8* %add.ptr86, undef + br i1 %cmp66, label %for.body68, label %sw.epilog + +sw.epilog: ; preds = %for.body68 + unreachable + +Exit: ; preds = %entry + ret void +} + +attributes #0 = { "use-soft-float"="false" } + +!llvm.ident = !{!0} + +!0 = !{!"clang version 10.0.0 (https://github.com/llvm/llvm-project.git 0fedc26a0dc0066f3968b9fea6a4e1f746c8d5a4)"} +!1 = !{!2, !2, i64 0} +!2 = !{!"omnipotent char", !3, i64 0} +!3 = !{!"Simple C/C++ TBAA"} +!4 = !{!5, !5, i64 0} +!5 = !{!"long", !2, i64 0} From b523790ae1b30a1708d2fc7937f90e283330ef33 Mon Sep 17 00:00:00 2001 From: Wei Mi Date: Mon, 7 Oct 2019 16:12:37 +0000 Subject: [PATCH 118/254] [SampleFDO] Add compression support for any section in ExtBinary profile format Previously ExtBinary profile format only supports compression using zlib for profile symbol list. In this patch, we extend the compression support to any section. User can select some or all of the sections to compress. In an experiment, for a 45M profile in ExtBinary format, compressing name table reduced its size to 24M, and compressing all the sections reduced its size to 11M. Differential Revision: https://reviews.llvm.org/D68253 llvm-svn: 373914 --- llvm/include/llvm/ProfileData/SampleProf.h | 20 ++- .../llvm/ProfileData/SampleProfReader.h | 10 +- .../llvm/ProfileData/SampleProfWriter.h | 38 ++++-- llvm/lib/ProfileData/SampleProf.cpp | 55 ++------ llvm/lib/ProfileData/SampleProfReader.cpp | 84 +++++++++--- llvm/lib/ProfileData/SampleProfWriter.cpp | 102 ++++++++++++--- .../compressed-profile-symbol-list.ll | 2 +- .../SampleProfile/profile-format-compress.ll | 123 ++++++++++++++++++ .../uncompressed-profile-symbol-list.ll | 2 +- .../profile-symbol-list-compress.test | 6 + .../llvm-profdata/roundtrip-compress.test | 10 ++ llvm/tools/llvm-profdata/llvm-profdata.cpp | 47 +++++-- 12 files changed, 387 insertions(+), 112 deletions(-) create mode 100644 llvm/test/Transforms/SampleProfile/profile-format-compress.ll create mode 100644 llvm/test/tools/llvm-profdata/profile-symbol-list-compress.test create mode 100644 llvm/test/tools/llvm-profdata/roundtrip-compress.test diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h index 9ad740ed804eb..936ebcecfe96d 100644 --- a/llvm/include/llvm/ProfileData/SampleProf.h +++ b/llvm/include/llvm/ProfileData/SampleProf.h @@ -145,11 +145,25 @@ static inline std::string getSecName(SecType Type) { // and SampleProfileExtBinaryBaseWriter. struct SecHdrTableEntry { SecType Type; - uint64_t Flag; + uint64_t Flags; uint64_t Offset; uint64_t Size; }; +enum SecFlags { SecFlagInValid = 0, SecFlagCompress = (1 << 0) }; + +static inline void addSecFlags(SecHdrTableEntry &Entry, uint64_t Flags) { + Entry.Flags |= Flags; +} + +static inline void removeSecFlags(SecHdrTableEntry &Entry, uint64_t Flags) { + Entry.Flags &= ~Flags; +} + +static inline bool hasSecFlag(SecHdrTableEntry &Entry, SecFlags Flag) { + return Entry.Flags & Flag; +} + /// Represents the relative location of an instruction. /// /// Instruction locations are specified by the line offset from the @@ -643,9 +657,9 @@ class ProfileSymbolList { unsigned size() { return Syms.size(); } void setToCompress(bool TC) { ToCompress = TC; } + bool toCompress() { return ToCompress; } - std::error_code read(uint64_t CompressSize, uint64_t UncompressSize, - const uint8_t *Data); + std::error_code read(const uint8_t *Data, uint64_t ListSize); std::error_code write(raw_ostream &OS); void dump(raw_ostream &OS = dbgs()) const; diff --git a/llvm/include/llvm/ProfileData/SampleProfReader.h b/llvm/include/llvm/ProfileData/SampleProfReader.h index 761dbde059569..424818bbb26df 100644 --- a/llvm/include/llvm/ProfileData/SampleProfReader.h +++ b/llvm/include/llvm/ProfileData/SampleProfReader.h @@ -488,6 +488,14 @@ class SampleProfileReaderRawBinary : public SampleProfileReaderBinary { /// possible to define other types of profile inherited from /// SampleProfileReaderExtBinaryBase/SampleProfileWriterExtBinaryBase. class SampleProfileReaderExtBinaryBase : public SampleProfileReaderBinary { +private: + std::error_code decompressSection(const uint8_t *SecStart, + const uint64_t SecSize, + const uint8_t *&DecompressBuf, + uint64_t &DecompressBufSize); + + BumpPtrAllocator Allocator; + protected: std::vector SecHdrTable; std::unique_ptr ProfSymList; @@ -518,7 +526,7 @@ class SampleProfileReaderExtBinary : public SampleProfileReaderExtBinaryBase { virtual std::error_code verifySPMagic(uint64_t Magic) override; virtual std::error_code readOneSection(const uint8_t *Start, uint64_t Size, SecType Type) override; - std::error_code readProfileSymbolList(); + std::error_code readProfileSymbolList(uint64_t Size); public: SampleProfileReaderExtBinary(std::unique_ptr B, LLVMContext &C, diff --git a/llvm/include/llvm/ProfileData/SampleProfWriter.h b/llvm/include/llvm/ProfileData/SampleProfWriter.h index 35218e3879c4a..ae7ef2deaf059 100644 --- a/llvm/include/llvm/ProfileData/SampleProfWriter.h +++ b/llvm/include/llvm/ProfileData/SampleProfWriter.h @@ -143,14 +143,16 @@ class SampleProfileWriterRawBinary : public SampleProfileWriterBinary { class SampleProfileWriterExtBinaryBase : public SampleProfileWriterBinary { using SampleProfileWriterBinary::SampleProfileWriterBinary; - public: virtual std::error_code write(const StringMap &ProfileMap) override; + void setToCompressAllSections(); + void setToCompressSection(SecType Type); + protected: - uint64_t markSectionStart(); - uint64_t addNewSection(SecType Sec, uint64_t SectionStart); + uint64_t markSectionStart(SecType Type); + std::error_code addNewSection(SecType Sec, uint64_t SectionStart); virtual void initSectionLayout() = 0; virtual std::error_code writeSections(const StringMap &ProfileMap) = 0; @@ -158,34 +160,52 @@ class SampleProfileWriterExtBinaryBase : public SampleProfileWriterBinary { // Specifiy the section layout in the profile. Note that the order in // SecHdrTable (order to collect sections) may be different from the // order in SectionLayout (order to write out sections into profile). - SmallVector SectionLayout; + SmallVector SectionLayout; private: void allocSecHdrTable(); std::error_code writeSecHdrTable(); virtual std::error_code writeHeader(const StringMap &ProfileMap) override; - + void addSectionFlags(SecType Type, SecFlags Flags); + SecHdrTableEntry &getEntryInLayout(SecType Type); + std::error_code compressAndOutput(); + + // We will swap the raw_ostream held by LocalBufStream and that + // held by OutputStream if we try to add a section which needs + // compression. After the swap, all the data written to output + // will be temporarily buffered into the underlying raw_string_ostream + // originally held by LocalBufStream. After the data writing for the + // section is completed, compress the data in the local buffer, + // swap the raw_ostream back and write the compressed data to the + // real output. + std::unique_ptr LocalBufStream; // The location where the output stream starts. uint64_t FileStart; // The location in the output stream where the SecHdrTable should be // written to. uint64_t SecHdrTableOffset; + // Initial Section Flags setting. std::vector SecHdrTable; }; class SampleProfileWriterExtBinary : public SampleProfileWriterExtBinaryBase { - using SampleProfileWriterExtBinaryBase::SampleProfileWriterExtBinaryBase; - public: + SampleProfileWriterExtBinary(std::unique_ptr &OS) + : SampleProfileWriterExtBinaryBase(OS) { + initSectionLayout(); + } + virtual void setProfileSymbolList(ProfileSymbolList *PSL) override { ProfSymList = PSL; }; private: virtual void initSectionLayout() override { - SectionLayout = {SecProfSummary, SecNameTable, SecLBRProfile, - SecProfileSymbolList}; + SectionLayout = {{SecProfSummary}, + {SecNameTable}, + {SecLBRProfile}, + {SecProfileSymbolList}}; }; virtual std::error_code writeSections(const StringMap &ProfileMap) override; diff --git a/llvm/lib/ProfileData/SampleProf.cpp b/llvm/lib/ProfileData/SampleProf.cpp index e94848df24e20..2150c5d5a70bb 100644 --- a/llvm/lib/ProfileData/SampleProf.cpp +++ b/llvm/lib/ProfileData/SampleProf.cpp @@ -15,7 +15,6 @@ #include "llvm/Config/llvm-config.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/Support/Compiler.h" -#include "llvm/Support/Compression.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" @@ -198,66 +197,34 @@ FunctionSamples::findFunctionSamples(const DILocation *DIL) const { LLVM_DUMP_METHOD void FunctionSamples::dump() const { print(dbgs(), 0); } #endif -std::error_code ProfileSymbolList::read(uint64_t CompressSize, - uint64_t UncompressSize, - const uint8_t *Data) { +std::error_code ProfileSymbolList::read(const uint8_t *Data, + uint64_t ListSize) { const char *ListStart = reinterpret_cast(Data); - // CompressSize being non-zero means the profile is compressed and - // needs to be uncompressed first. - if (CompressSize) { - if (!llvm::zlib::isAvailable()) - return sampleprof_error::zlib_unavailable; - - StringRef CompressedStrings(reinterpret_cast(Data), - CompressSize); - char *Buffer = Allocator.Allocate(UncompressSize); - size_t UCSize = UncompressSize; - llvm::Error E = zlib::uncompress(CompressedStrings, Buffer, UCSize); - if (E) - return sampleprof_error::uncompress_failed; - ListStart = Buffer; - } - uint64_t Size = 0; - while (Size < UncompressSize) { + while (Size < ListSize) { StringRef Str(ListStart + Size); add(Str); Size += Str.size() + 1; } + if (Size != ListSize) + return sampleprof_error::malformed; return sampleprof_error::success; } std::error_code ProfileSymbolList::write(raw_ostream &OS) { - // Sort the symbols before doing compression. It will make the - // compression much more effective. + // Sort the symbols before output. If doing compression. + // It will make the compression much more effective. std::vector SortedList; SortedList.insert(SortedList.begin(), Syms.begin(), Syms.end()); llvm::sort(SortedList); - std::string UncompressedStrings; + std::string OutputString; for (auto &Sym : SortedList) { - UncompressedStrings.append(Sym.str()); - UncompressedStrings.append(1, '\0'); + OutputString.append(Sym.str()); + OutputString.append(1, '\0'); } - if (ToCompress) { - if (!llvm::zlib::isAvailable()) - return sampleprof_error::zlib_unavailable; - SmallString<128> CompressedStrings; - llvm::Error E = zlib::compress(UncompressedStrings, CompressedStrings, - zlib::BestSizeCompression); - if (E) - return sampleprof_error::compress_failed; - encodeULEB128(UncompressedStrings.size(), OS); - encodeULEB128(CompressedStrings.size(), OS); - OS << CompressedStrings.str(); - } else { - encodeULEB128(UncompressedStrings.size(), OS); - // If profile symbol list is not compressed, we will still save - // a compressed size value, but the value of the size is 0. - encodeULEB128(0, OS); - OS << UncompressedStrings; - } + OS << OutputString; return sampleprof_error::success; } diff --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp index 07272ebac0a9e..075bd9412b0c9 100644 --- a/llvm/lib/ProfileData/SampleProfReader.cpp +++ b/llvm/lib/ProfileData/SampleProfReader.cpp @@ -26,6 +26,7 @@ #include "llvm/IR/ProfileSummary.h" #include "llvm/ProfileData/ProfileCommon.h" #include "llvm/ProfileData/SampleProf.h" +#include "llvm/Support/Compression.h" #include "llvm/Support/ErrorOr.h" #include "llvm/Support/LEB128.h" #include "llvm/Support/LineIterator.h" @@ -471,6 +472,7 @@ std::error_code SampleProfileReaderExtBinary::readOneSection(const uint8_t *Start, uint64_t Size, SecType Type) { Data = Start; + End = Start + Size; switch (Type) { case SecProfSummary: if (std::error_code EC = readSummary()) @@ -487,7 +489,7 @@ SampleProfileReaderExtBinary::readOneSection(const uint8_t *Start, } break; case SecProfileSymbolList: - if (std::error_code EC = readProfileSymbolList()) + if (std::error_code EC = readProfileSymbolList(Size)) return EC; break; default: @@ -496,27 +498,43 @@ SampleProfileReaderExtBinary::readOneSection(const uint8_t *Start, return sampleprof_error::success; } -std::error_code SampleProfileReaderExtBinary::readProfileSymbolList() { - auto UncompressSize = readNumber(); - if (std::error_code EC = UncompressSize.getError()) - return EC; +std::error_code +SampleProfileReaderExtBinary::readProfileSymbolList(uint64_t Size) { + if (!ProfSymList) + ProfSymList = std::make_unique(); - auto CompressSize = readNumber(); - if (std::error_code EC = CompressSize.getError()) + if (std::error_code EC = ProfSymList->read(Data, Size)) return EC; - if (!ProfSymList) - ProfSymList = std::make_unique(); + Data = Data + Size; + return sampleprof_error::success; +} - if (std::error_code EC = - ProfSymList->read(*CompressSize, *UncompressSize, Data)) +std::error_code SampleProfileReaderExtBinaryBase::decompressSection( + const uint8_t *SecStart, const uint64_t SecSize, + const uint8_t *&DecompressBuf, uint64_t &DecompressBufSize) { + Data = SecStart; + End = SecStart + SecSize; + auto DecompressSize = readNumber(); + if (std::error_code EC = DecompressSize.getError()) return EC; + DecompressBufSize = *DecompressSize; - // CompressSize is zero only when ProfileSymbolList is not compressed. - if (*CompressSize == 0) - Data = Data + *UncompressSize; - else - Data = Data + *CompressSize; + auto CompressSize = readNumber(); + if (std::error_code EC = CompressSize.getError()) + return EC; + + if (!llvm::zlib::isAvailable()) + return sampleprof_error::zlib_unavailable; + + StringRef CompressedStrings(reinterpret_cast(Data), + *CompressSize); + char *Buffer = Allocator.Allocate(DecompressBufSize); + llvm::Error E = + zlib::uncompress(CompressedStrings, Buffer, DecompressBufSize); + if (E) + return sampleprof_error::uncompress_failed; + DecompressBuf = reinterpret_cast(Buffer); return sampleprof_error::success; } @@ -528,11 +546,35 @@ std::error_code SampleProfileReaderExtBinaryBase::read() { // Skip empty section. if (!Entry.Size) continue; + const uint8_t *SecStart = BufStart + Entry.Offset; - if (std::error_code EC = readOneSection(SecStart, Entry.Size, Entry.Type)) + uint64_t SecSize = Entry.Size; + + // If the section is compressed, decompress it into a buffer + // DecompressBuf before reading the actual data. The pointee of + // 'Data' will be changed to buffer hold by DecompressBuf + // temporarily when reading the actual data. + bool isCompressed = hasSecFlag(Entry, SecFlagCompress); + if (isCompressed) { + const uint8_t *DecompressBuf; + uint64_t DecompressBufSize; + if (std::error_code EC = decompressSection( + SecStart, SecSize, DecompressBuf, DecompressBufSize)) + return EC; + SecStart = DecompressBuf; + SecSize = DecompressBufSize; + } + + if (std::error_code EC = readOneSection(SecStart, SecSize, Entry.Type)) return EC; - if (Data != SecStart + Entry.Size) + if (Data != SecStart + SecSize) return sampleprof_error::malformed; + + // Change the pointee of 'Data' from DecompressBuf to original Buffer. + if (isCompressed) { + Data = BufStart + Entry.Offset; + End = BufStart + Buffer->getBufferSize(); + } } return sampleprof_error::success; @@ -621,10 +663,10 @@ std::error_code SampleProfileReaderExtBinaryBase::readSecHdrTableEntry() { return EC; Entry.Type = static_cast(*Type); - auto Flag = readUnencodedNumber(); - if (std::error_code EC = Flag.getError()) + auto Flags = readUnencodedNumber(); + if (std::error_code EC = Flags.getError()) return EC; - Entry.Flag = *Flag; + Entry.Flags = *Flags; auto Offset = readUnencodedNumber(); if (std::error_code EC = Offset.getError()) diff --git a/llvm/lib/ProfileData/SampleProfWriter.cpp b/llvm/lib/ProfileData/SampleProfWriter.cpp index 068ce5bf959cd..03446367665bd 100644 --- a/llvm/lib/ProfileData/SampleProfWriter.cpp +++ b/llvm/lib/ProfileData/SampleProfWriter.cpp @@ -21,6 +21,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ProfileData/ProfileCommon.h" #include "llvm/ProfileData/SampleProf.h" +#include "llvm/Support/Compression.h" #include "llvm/Support/Endian.h" #include "llvm/Support/EndianStream.h" #include "llvm/Support/ErrorOr.h" @@ -72,21 +73,58 @@ SampleProfileWriter::write(const StringMap &ProfileMap) { return sampleprof_error::success; } +SecHdrTableEntry & +SampleProfileWriterExtBinaryBase::getEntryInLayout(SecType Type) { + auto SecIt = std::find_if( + SectionLayout.begin(), SectionLayout.end(), + [=](const auto &Entry) -> bool { return Entry.Type == Type; }); + return *SecIt; +} + /// Return the current position and prepare to use it as the start /// position of a section. -uint64_t SampleProfileWriterExtBinaryBase::markSectionStart() { - return OutputStream->tell(); +uint64_t SampleProfileWriterExtBinaryBase::markSectionStart(SecType Type) { + uint64_t SectionStart = OutputStream->tell(); + auto &Entry = getEntryInLayout(Type); + // Use LocalBuf as a temporary output for writting data. + if (hasSecFlag(Entry, SecFlagCompress)) + LocalBufStream.swap(OutputStream); + return SectionStart; +} + +std::error_code SampleProfileWriterExtBinaryBase::compressAndOutput() { + if (!llvm::zlib::isAvailable()) + return sampleprof_error::zlib_unavailable; + std::string &UncompressedStrings = + static_cast(LocalBufStream.get())->str(); + if (UncompressedStrings.size() == 0) + return sampleprof_error::success; + auto &OS = *OutputStream; + SmallString<128> CompressedStrings; + llvm::Error E = zlib::compress(UncompressedStrings, CompressedStrings, + zlib::BestSizeCompression); + if (E) + return sampleprof_error::compress_failed; + encodeULEB128(UncompressedStrings.size(), OS); + encodeULEB128(CompressedStrings.size(), OS); + OS << CompressedStrings.str(); + UncompressedStrings.clear(); + return sampleprof_error::success; } -/// Add a new section into section header table. Return the position -/// of SectionEnd. -uint64_t -SampleProfileWriterExtBinaryBase::addNewSection(SecType Sec, +/// Add a new section into section header table. +std::error_code +SampleProfileWriterExtBinaryBase::addNewSection(SecType Type, uint64_t SectionStart) { - uint64_t SectionEnd = OutputStream->tell(); - SecHdrTable.push_back( - {Sec, 0, SectionStart - FileStart, SectionEnd - SectionStart}); - return SectionEnd; + auto Entry = getEntryInLayout(Type); + if (hasSecFlag(Entry, SecFlagCompress)) { + LocalBufStream.swap(OutputStream); + if (std::error_code EC = compressAndOutput()) + return EC; + } + SecHdrTable.push_back({Type, Entry.Flags, SectionStart - FileStart, + OutputStream->tell() - SectionStart}); + return sampleprof_error::success; } std::error_code SampleProfileWriterExtBinaryBase::write( @@ -94,6 +132,8 @@ std::error_code SampleProfileWriterExtBinaryBase::write( if (std::error_code EC = writeHeader(ProfileMap)) return EC; + std::string LocalBuf; + LocalBufStream = std::make_unique(LocalBuf); if (std::error_code EC = writeSections(ProfileMap)) return EC; @@ -105,28 +145,38 @@ std::error_code SampleProfileWriterExtBinaryBase::write( std::error_code SampleProfileWriterExtBinary::writeSections( const StringMap &ProfileMap) { - uint64_t SectionStart = markSectionStart(); + uint64_t SectionStart = markSectionStart(SecProfSummary); computeSummary(ProfileMap); if (auto EC = writeSummary()) return EC; - SectionStart = addNewSection(SecProfSummary, SectionStart); + if (std::error_code EC = addNewSection(SecProfSummary, SectionStart)) + return EC; // Generate the name table for all the functions referenced in the profile. + SectionStart = markSectionStart(SecNameTable); for (const auto &I : ProfileMap) { addName(I.first()); addNames(I.second); } writeNameTable(); - SectionStart = addNewSection(SecNameTable, SectionStart); + if (std::error_code EC = addNewSection(SecNameTable, SectionStart)) + return EC; + SectionStart = markSectionStart(SecLBRProfile); if (std::error_code EC = writeFuncProfiles(ProfileMap)) return EC; - SectionStart = addNewSection(SecLBRProfile, SectionStart); + if (std::error_code EC = addNewSection(SecLBRProfile, SectionStart)) + return EC; + + if (ProfSymList && ProfSymList->toCompress()) + setToCompressSection(SecProfileSymbolList); + SectionStart = markSectionStart(SecProfileSymbolList); if (ProfSymList && ProfSymList->size() > 0) if (std::error_code EC = ProfSymList->write(*OutputStream)) return EC; - addNewSection(SecProfileSymbolList, SectionStart); + if (std::error_code EC = addNewSection(SecProfileSymbolList, SectionStart)) + return EC; return sampleprof_error::success; } @@ -308,6 +358,23 @@ std::error_code SampleProfileWriterBinary::writeHeader( return sampleprof_error::success; } +void SampleProfileWriterExtBinaryBase::setToCompressAllSections() { + for (auto &Entry : SectionLayout) + addSecFlags(Entry, SecFlagCompress); +} + +void SampleProfileWriterExtBinaryBase::setToCompressSection(SecType Type) { + addSectionFlags(Type, SecFlagCompress); +} + +void SampleProfileWriterExtBinaryBase::addSectionFlags(SecType Type, + SecFlags Flags) { + for (auto &Entry : SectionLayout) { + if (Entry.Type == Type) + addSecFlags(Entry, Flags); + } +} + void SampleProfileWriterExtBinaryBase::allocSecHdrTable() { support::endian::Writer Writer(*OutputStream, support::little); @@ -342,9 +409,9 @@ std::error_code SampleProfileWriterExtBinaryBase::writeSecHdrTable() { // to adjust the order in SecHdrTable to be consistent with // SectionLayout when we write SecHdrTable to the memory. for (uint32_t i = 0; i < SectionLayout.size(); i++) { - uint32_t idx = IndexMap[static_cast(SectionLayout[i])]; + uint32_t idx = IndexMap[static_cast(SectionLayout[i].Type)]; Writer.write(static_cast(SecHdrTable[idx].Type)); - Writer.write(static_cast(SecHdrTable[idx].Flag)); + Writer.write(static_cast(SecHdrTable[idx].Flags)); Writer.write(static_cast(SecHdrTable[idx].Offset)); Writer.write(static_cast(SecHdrTable[idx].Size)); } @@ -362,7 +429,6 @@ std::error_code SampleProfileWriterExtBinaryBase::writeHeader( FileStart = OS.tell(); writeMagicIdent(Format); - initSectionLayout(); allocSecHdrTable(); return sampleprof_error::success; } diff --git a/llvm/test/Transforms/SampleProfile/compressed-profile-symbol-list.ll b/llvm/test/Transforms/SampleProfile/compressed-profile-symbol-list.ll index 6626567722993..6ac62c1701b87 100644 --- a/llvm/test/Transforms/SampleProfile/compressed-profile-symbol-list.ll +++ b/llvm/test/Transforms/SampleProfile/compressed-profile-symbol-list.ll @@ -1,5 +1,5 @@ ; REQUIRES: zlib ; Append inline.prof with profile symbol list and save it after compression. -; RUN: llvm-profdata merge --sample --prof-sym-list=%S/Inputs/profile-symbol-list.text --compress-prof-sym-list=true --extbinary %S/Inputs/inline.prof --output=%t.profdata +; RUN: llvm-profdata merge --sample --prof-sym-list=%S/Inputs/profile-symbol-list.text --compress-all-sections=true --extbinary %S/Inputs/inline.prof --output=%t.profdata ; RUN: opt < %S/Inputs/profile-symbol-list.ll -sample-profile -profile-accurate-for-symsinlist -sample-profile-file=%t.profdata -S | FileCheck %S/Inputs/profile-symbol-list.ll ; RUN: opt < %S/Inputs/profile-symbol-list.ll -passes=sample-profile -profile-accurate-for-symsinlist -sample-profile-file=%t.profdata -S | FileCheck %S/Inputs/profile-symbol-list.ll diff --git a/llvm/test/Transforms/SampleProfile/profile-format-compress.ll b/llvm/test/Transforms/SampleProfile/profile-format-compress.ll new file mode 100644 index 0000000000000..be4eae3cddcb0 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/profile-format-compress.ll @@ -0,0 +1,123 @@ +; REQUIRES: zlib +; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/inline.prof -S | FileCheck %s +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline.prof -S | FileCheck %s +; RUN: llvm-profdata merge -sample -extbinary -compress-all-sections %S/Inputs/inline.prof -o %t.compress.extbinary.afdo +; RUN: opt < %s -sample-profile -sample-profile-file=%t.compress.extbinary.afdo -S | FileCheck %s +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t.compress.extbinary.afdo -S | FileCheck %s + +; Original C++ test case +; +; #include +; +; int sum(int x, int y) { +; return x + y; +; } +; +; int main() { +; int s, i = 0; +; while (i++ < 20000 * 20000) +; if (i != 100) s = sum(i, s); else s = 30; +; printf("sum is %d\n", s); +; return 0; +; } +; +@.str = private unnamed_addr constant [11 x i8] c"sum is %d\0A\00", align 1 + +; Check sample-profile phase using compressed extbinary format profile +; will annotate the IR with exactly the same result as using text format. +; CHECK: br i1 %cmp, label %while.body, label %while.end{{.*}} !prof ![[IDX1:[0-9]*]] +; CHECK: br i1 %cmp1, label %if.then, label %if.else{{.*}} !prof ![[IDX2:[0-9]*]] +; CHECK: call i32 (i8*, ...) @printf{{.*}} !prof ![[IDX3:[0-9]*]] +; CHECK: = !{!"TotalCount", i64 26781} +; CHECK: = !{!"MaxCount", i64 5553} +; CHECK: ![[IDX1]] = !{!"branch_weights", i32 5392, i32 163} +; CHECK: ![[IDX2]] = !{!"branch_weights", i32 5280, i32 113} +; CHECK: ![[IDX3]] = !{!"branch_weights", i32 1} + +; Function Attrs: nounwind uwtable +define i32 @_Z3sumii(i32 %x, i32 %y) !dbg !4 { +entry: + %x.addr = alloca i32, align 4 + %y.addr = alloca i32, align 4 + store i32 %x, i32* %x.addr, align 4 + store i32 %y, i32* %y.addr, align 4 + %0 = load i32, i32* %x.addr, align 4, !dbg !11 + %1 = load i32, i32* %y.addr, align 4, !dbg !11 + %add = add nsw i32 %0, %1, !dbg !11 + ret i32 %add, !dbg !11 +} + +; Function Attrs: uwtable +define i32 @main() !dbg !7 { +entry: + %retval = alloca i32, align 4 + %s = alloca i32, align 4 + %i = alloca i32, align 4 + store i32 0, i32* %retval + store i32 0, i32* %i, align 4, !dbg !12 + br label %while.cond, !dbg !13 + +while.cond: ; preds = %if.end, %entry + %0 = load i32, i32* %i, align 4, !dbg !14 + %inc = add nsw i32 %0, 1, !dbg !14 + store i32 %inc, i32* %i, align 4, !dbg !14 + %cmp = icmp slt i32 %0, 400000000, !dbg !14 + br i1 %cmp, label %while.body, label %while.end, !dbg !14 + +while.body: ; preds = %while.cond + %1 = load i32, i32* %i, align 4, !dbg !16 + %cmp1 = icmp ne i32 %1, 100, !dbg !16 + br i1 %cmp1, label %if.then, label %if.else, !dbg !16 + + +if.then: ; preds = %while.body + %2 = load i32, i32* %i, align 4, !dbg !18 + %3 = load i32, i32* %s, align 4, !dbg !18 + %call = call i32 @_Z3sumii(i32 %2, i32 %3), !dbg !18 + store i32 %call, i32* %s, align 4, !dbg !18 + br label %if.end, !dbg !18 + +if.else: ; preds = %while.body + store i32 30, i32* %s, align 4, !dbg !20 + br label %if.end + +if.end: ; preds = %if.else, %if.then + br label %while.cond, !dbg !22 + +while.end: ; preds = %while.cond + %4 = load i32, i32* %s, align 4, !dbg !24 + %call2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 0), i32 %4), !dbg !24 + ret i32 0, !dbg !25 +} + +declare i32 @printf(i8*, ...) #2 + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!8, !9} +!llvm.ident = !{!10} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, emissionKind: NoDebug, file: !1, enums: !2, retainedTypes: !2, globals: !2, imports: !2) +!1 = !DIFile(filename: "calls.cc", directory: ".") +!2 = !{} +!4 = distinct !DISubprogram(name: "sum", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 3, file: !1, scope: !5, type: !6, retainedNodes: !2) +!5 = !DIFile(filename: "calls.cc", directory: ".") +!6 = !DISubroutineType(types: !2) +!7 = distinct !DISubprogram(name: "main", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 7, file: !1, scope: !5, type: !6, retainedNodes: !2) +!8 = !{i32 2, !"Dwarf Version", i32 4} +!9 = !{i32 1, !"Debug Info Version", i32 3} +!10 = !{!"clang version 3.5 "} +!11 = !DILocation(line: 4, scope: !4) +!12 = !DILocation(line: 8, scope: !7) +!13 = !DILocation(line: 9, scope: !7) +!14 = !DILocation(line: 9, scope: !15) +!15 = !DILexicalBlockFile(discriminator: 2, file: !1, scope: !7) +!16 = !DILocation(line: 10, scope: !17) +!17 = distinct !DILexicalBlock(line: 10, column: 0, file: !1, scope: !7) +!18 = !DILocation(line: 10, scope: !19) +!19 = !DILexicalBlockFile(discriminator: 2, file: !1, scope: !17) +!20 = !DILocation(line: 10, scope: !21) +!21 = !DILexicalBlockFile(discriminator: 4, file: !1, scope: !17) +!22 = !DILocation(line: 10, scope: !23) +!23 = !DILexicalBlockFile(discriminator: 6, file: !1, scope: !17) +!24 = !DILocation(line: 11, scope: !7) +!25 = !DILocation(line: 12, scope: !7) diff --git a/llvm/test/Transforms/SampleProfile/uncompressed-profile-symbol-list.ll b/llvm/test/Transforms/SampleProfile/uncompressed-profile-symbol-list.ll index abe562d7ebbe3..5eaf4b279d73e 100644 --- a/llvm/test/Transforms/SampleProfile/uncompressed-profile-symbol-list.ll +++ b/llvm/test/Transforms/SampleProfile/uncompressed-profile-symbol-list.ll @@ -1,4 +1,4 @@ ; Append inline.prof with profile symbol list and save it without compression. -; RUN: llvm-profdata merge --sample --prof-sym-list=%S/Inputs/profile-symbol-list.text --compress-prof-sym-list=false --extbinary %S/Inputs/inline.prof --output=%t.profdata +; RUN: llvm-profdata merge --sample --prof-sym-list=%S/Inputs/profile-symbol-list.text --compress-all-sections=false --extbinary %S/Inputs/inline.prof --output=%t.profdata ; RUN: opt < %S/Inputs/profile-symbol-list.ll -sample-profile -profile-accurate-for-symsinlist -sample-profile-file=%t.profdata -S | FileCheck %S/Inputs/profile-symbol-list.ll ; RUN: opt < %S/Inputs/profile-symbol-list.ll -passes=sample-profile -profile-accurate-for-symsinlist -sample-profile-file=%t.profdata -S | FileCheck %S/Inputs/profile-symbol-list.ll diff --git a/llvm/test/tools/llvm-profdata/profile-symbol-list-compress.test b/llvm/test/tools/llvm-profdata/profile-symbol-list-compress.test new file mode 100644 index 0000000000000..66b0543d7a416 --- /dev/null +++ b/llvm/test/tools/llvm-profdata/profile-symbol-list-compress.test @@ -0,0 +1,6 @@ +REQUIRES: zlib +; RUN: llvm-profdata merge -sample -extbinary -compress-all-sections -prof-sym-list=%S/Inputs/profile-symbol-list-1.text %S/Inputs/sample-profile.proftext -o %t.1.output +; RUN: llvm-profdata merge -sample -extbinary -compress-all-sections -prof-sym-list=%S/Inputs/profile-symbol-list-2.text %S/Inputs/sample-profile.proftext -o %t.2.output +; RUN: llvm-profdata merge -sample -extbinary -compress-all-sections %t.1.output %t.2.output -o %t.3.output +; RUN: llvm-profdata show -sample -show-prof-sym-list %t.3.output > %t.4.output +; RUN: diff %S/Inputs/profile-symbol-list.expected %t.4.output diff --git a/llvm/test/tools/llvm-profdata/roundtrip-compress.test b/llvm/test/tools/llvm-profdata/roundtrip-compress.test new file mode 100644 index 0000000000000..7e495b6d95128 --- /dev/null +++ b/llvm/test/tools/llvm-profdata/roundtrip-compress.test @@ -0,0 +1,10 @@ +REQUIRES: zlib +# Round trip from text --> compressed extbinary --> text +RUN: llvm-profdata merge --sample --extbinary -compress-all-sections -output=%t.1.profdata %S/Inputs/sample-profile.proftext +RUN: llvm-profdata merge --sample --text -output=%t.1.proftext %t.1.profdata +RUN: diff %t.1.proftext %S/Inputs/sample-profile.proftext +# Round trip from text --> binary --> compressed extbinary --> text +RUN: llvm-profdata merge --sample --binary -output=%t.2.profdata %S/Inputs/sample-profile.proftext +RUN: llvm-profdata merge --sample --extbinary -compress-all-sections -output=%t.3.profdata %t.2.profdata +RUN: llvm-profdata merge --sample --text -output=%t.2.proftext %t.3.profdata +RUN: diff %t.2.proftext %S/Inputs/sample-profile.proftext diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp index e311c1069f722..1470442c38b61 100644 --- a/llvm/tools/llvm-profdata/llvm-profdata.cpp +++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp @@ -439,12 +439,35 @@ static void populateProfileSymbolList(MemoryBuffer *Buffer, PSL.add(symbol); } +static void handleExtBinaryWriter(sampleprof::SampleProfileWriter &Writer, + ProfileFormat OutputFormat, + MemoryBuffer *Buffer, + sampleprof::ProfileSymbolList &WriterList, + bool CompressAllSections) { + populateProfileSymbolList(Buffer, WriterList); + if (WriterList.size() > 0 && OutputFormat != PF_Ext_Binary) + warn("Profile Symbol list is not empty but the output format is not " + "ExtBinary format. The list will be lost in the output. "); + + Writer.setProfileSymbolList(&WriterList); + + if (CompressAllSections) { + if (OutputFormat != PF_Ext_Binary) { + warn("-compress-all-section is ignored. Specify -extbinary to enable it"); + } else { + auto ExtBinaryWriter = + static_cast(&Writer); + ExtBinaryWriter->setToCompressAllSections(); + } + } +} + static void mergeSampleProfile(const WeightedFileVector &Inputs, SymbolRemapper *Remapper, StringRef OutputFilename, ProfileFormat OutputFormat, StringRef ProfileSymbolListFile, - bool CompressProfSymList, FailureMode FailMode) { + bool CompressAllSections, FailureMode FailMode) { using namespace sampleprof; StringMap ProfileMap; SmallVector, 5> Readers; @@ -496,17 +519,12 @@ static void mergeSampleProfile(const WeightedFileVector &Inputs, if (std::error_code EC = WriterOrErr.getError()) exitWithErrorCode(EC, OutputFilename); + auto Writer = std::move(WriterOrErr.get()); // WriterList will have StringRef refering to string in Buffer. // Make sure Buffer lives as long as WriterList. auto Buffer = getInputFileBuf(ProfileSymbolListFile); - populateProfileSymbolList(Buffer.get(), WriterList); - WriterList.setToCompress(CompressProfSymList); - if (WriterList.size() > 0 && OutputFormat != PF_Ext_Binary) - warn("Profile Symbol list is not empty but the output format is not " - "ExtBinary format. The list will be lost in the output. "); - - auto Writer = std::move(WriterOrErr.get()); - Writer->setProfileSymbolList(&WriterList); + handleExtBinaryWriter(*Writer, OutputFormat, Buffer.get(), WriterList, + CompressAllSections); Writer->write(ProfileMap); } @@ -630,9 +648,10 @@ static int merge_main(int argc, const char *argv[]) { "prof-sym-list", cl::init(""), cl::desc("Path to file containing the list of function symbols " "used to populate profile symbol list")); - cl::opt CompressProfSymList( - "compress-prof-sym-list", cl::init(false), cl::Hidden, - cl::desc("Compress profile symbol list before write it into profile. ")); + cl::opt CompressAllSections( + "compress-all-sections", cl::init(false), cl::Hidden, + cl::desc("Compress all sections when writing the profile (only " + "meaningful for -extbinary)")); cl::ParseCommandLineOptions(argc, argv, "LLVM profile data merger\n"); @@ -666,8 +685,8 @@ static int merge_main(int argc, const char *argv[]) { OutputFormat, OutputSparse, NumThreads, FailureMode); else mergeSampleProfile(WeightedInputs, Remapper.get(), OutputFilename, - OutputFormat, ProfileSymbolListFile, - CompressProfSymList, FailureMode); + OutputFormat, ProfileSymbolListFile, CompressAllSections, + FailureMode); return 0; } From 9c2e12304388eef8b5ce5d1ac07afe52860045ca Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 7 Oct 2019 16:15:20 +0000 Subject: [PATCH 119/254] [X86][SSE] getTargetShuffleInputs - move VT.isSimple/isVector checks inside. NFCI. Stop all the callers from having to check the value type before calling getTargetShuffleInputs. llvm-svn: 373915 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 915046048ff15..3806b0e233017 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -7259,6 +7259,10 @@ static bool getTargetShuffleInputs(SDValue Op, const APInt &DemandedElts, SmallVectorImpl &Mask, SelectionDAG &DAG, unsigned Depth, bool ResolveZero) { + EVT VT = Op.getValueType(); + if (!VT.isSimple() || !VT.isVector()) + return false; + APInt KnownUndef, KnownZero; if (getTargetShuffleAndZeroables(Op, Mask, Inputs, KnownUndef, KnownZero)) { for (int i = 0, e = Mask.size(); i != e; ++i) { @@ -7280,6 +7284,10 @@ static bool getTargetShuffleInputs(SDValue Op, SmallVectorImpl &Inputs, SmallVectorImpl &Mask, SelectionDAG &DAG, unsigned Depth = 0, bool ResolveZero = true) { + EVT VT = Op.getValueType(); + if (!VT.isSimple() || !VT.isVector()) + return false; + unsigned NumElts = Op.getValueType().getVectorNumElements(); APInt DemandedElts = APInt::getAllOnesValue(NumElts); return getTargetShuffleInputs(Op, DemandedElts, Inputs, Mask, DAG, Depth, @@ -34574,8 +34582,8 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode( // Get target/faux shuffle mask. SmallVector OpMask; SmallVector OpInputs; - if (!VT.isSimple() || !getTargetShuffleInputs(Op, DemandedElts, OpInputs, - OpMask, TLO.DAG, Depth, false)) + if (!getTargetShuffleInputs(Op, DemandedElts, OpInputs, OpMask, TLO.DAG, + Depth, false)) return false; // Shuffle inputs must be the same size as the result. @@ -34954,8 +34962,7 @@ SDValue X86TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode( SmallVector ShuffleMask; SmallVector ShuffleOps; - if (VT.isSimple() && VT.isVector() && - getTargetShuffleInputs(Op, ShuffleOps, ShuffleMask, DAG, Depth)) { + if (getTargetShuffleInputs(Op, ShuffleOps, ShuffleMask, DAG, Depth)) { // If all the demanded elts are from one operand and are inline, // then we can use the operand directly. int NumOps = ShuffleOps.size(); From d5b983555f7a853c4cd95c95b66715ae4976bd55 Mon Sep 17 00:00:00 2001 From: Yitzhak Mandelbaum Date: Mon, 7 Oct 2019 16:20:22 +0000 Subject: [PATCH 120/254] [libTooling] Add `toString` method to the Stencil class Summary: `toString` generates a string representation of the stencil. Patch by Harshal T. Lehri. Reviewers: gribozavr Subscribers: cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D68574 llvm-svn: 373916 --- .../clang/Tooling/Refactoring/Stencil.h | 21 +++++++ clang/lib/Tooling/Refactoring/Stencil.cpp | 51 +++++++++++++++++ clang/unittests/Tooling/StencilTest.cpp | 55 +++++++++++++++++++ 3 files changed, 127 insertions(+) diff --git a/clang/include/clang/Tooling/Refactoring/Stencil.h b/clang/include/clang/Tooling/Refactoring/Stencil.h index 96fd978e945a6..b80320d409b33 100644 --- a/clang/include/clang/Tooling/Refactoring/Stencil.h +++ b/clang/include/clang/Tooling/Refactoring/Stencil.h @@ -50,6 +50,11 @@ class StencilPartInterface { virtual bool isEqual(const StencilPartInterface &other) const = 0; + /// Constructs a string representation of the StencilPart. StencilParts + /// generated by the `selection` and `run` functions do not have a unique + /// string representation. + virtual std::string toString() const = 0; + const void *typeId() const { return TypeId; } protected: @@ -86,6 +91,12 @@ class StencilPart { return Impl->isEqual(*Other.Impl); } + std::string toString() const { + if (Impl == nullptr) + return ""; + return Impl->toString(); + } + private: std::shared_ptr Impl; }; @@ -120,6 +131,16 @@ class Stencil { return eval(Result); } + /// Constructs a string representation of the Stencil. The string is not + /// guaranteed to be unique. + std::string toString() const { + std::vector PartStrings; + PartStrings.reserve(Parts.size()); + for (const auto &Part : Parts) + PartStrings.push_back(Part.toString()); + return llvm::join(PartStrings, ", "); + } + private: friend bool operator==(const Stencil &A, const Stencil &B); static StencilPart wrap(llvm::StringRef Text); diff --git a/clang/lib/Tooling/Refactoring/Stencil.cpp b/clang/lib/Tooling/Refactoring/Stencil.cpp index f7687f91e494a..ebfe78099db21 100644 --- a/clang/lib/Tooling/Refactoring/Stencil.cpp +++ b/clang/lib/Tooling/Refactoring/Stencil.cpp @@ -15,6 +15,7 @@ #include "clang/Lex/Lexer.h" #include "clang/Tooling/Refactoring/SourceCode.h" #include "clang/Tooling/Refactoring/SourceCodeBuilders.h" +#include "llvm/ADT/Twine.h" #include "llvm/Support/Errc.h" #include #include @@ -128,6 +129,54 @@ bool isEqualData(const MatchConsumer &A, return false; } +std::string toStringData(const RawTextData &Data) { + std::string Result; + llvm::raw_string_ostream OS(Result); + OS << "\""; + OS.write_escaped(Data.Text); + OS << "\""; + OS.flush(); + return Result; +} + +std::string toStringData(const DebugPrintNodeData &Data) { + return (llvm::Twine("dPrint(\"") + Data.Id + "\")").str(); +} + +std::string toStringData(const UnaryOperationData &Data) { + StringRef OpName; + switch (Data.Op) { + case UnaryNodeOperator::Parens: + OpName = "expression"; + break; + case UnaryNodeOperator::Deref: + OpName = "deref"; + break; + case UnaryNodeOperator::Address: + OpName = "addressOf"; + break; + } + return (OpName + "(\"" + Data.Id + "\")").str(); +} + +std::string toStringData(const SelectorData &) { return "SelectorData()"; } + +std::string toStringData(const AccessData &Data) { + return (llvm::Twine("access(\"") + Data.BaseId + "\", " + + Data.Member.toString() + ")") + .str(); +} + +std::string toStringData(const IfBoundData &Data) { + return (llvm::Twine("ifBound(\"") + Data.Id + "\", " + + Data.TruePart.toString() + ", " + Data.FalsePart.toString() + ")") + .str(); +} + +std::string toStringData(const MatchConsumer &) { + return "MatchConsumer()"; +} + // The `evalData()` overloads evaluate the given stencil data to a string, given // the match result, and append it to `Result`. We define an overload for each // type of stencil data. @@ -247,6 +296,8 @@ class StencilPartImpl : public StencilPartInterface { return isEqualData(Data, OtherPtr->Data); return false; } + + std::string toString() const override { return toStringData(Data); } }; } // namespace diff --git a/clang/unittests/Tooling/StencilTest.cpp b/clang/unittests/Tooling/StencilTest.cpp index f441fae803228..877193bf70a1c 100644 --- a/clang/unittests/Tooling/StencilTest.cpp +++ b/clang/unittests/Tooling/StencilTest.cpp @@ -389,4 +389,59 @@ TEST(StencilEqualityTest, InEqualityRun) { auto S2 = cat(run(F)); EXPECT_NE(S1, S2); } + +TEST(StencilToStringTest, RawTextOp) { + auto S = cat("foo bar baz"); + EXPECT_EQ(S.toString(), R"("foo bar baz")"); +} + +TEST(StencilToStringTest, RawTextOpEscaping) { + auto S = cat("foo \"bar\" baz\\n"); + EXPECT_EQ(S.toString(), R"("foo \"bar\" baz\\n")"); +} + +TEST(StencilToStringTest, DebugPrintNodeOp) { + auto S = cat(dPrint("Id")); + EXPECT_EQ(S.toString(), R"repr(dPrint("Id"))repr"); +} + +TEST(StencilToStringTest, ExpressionOp) { + auto S = cat(expression("Id")); + EXPECT_EQ(S.toString(), R"repr(expression("Id"))repr"); +} + +TEST(StencilToStringTest, DerefOp) { + auto S = cat(deref("Id")); + EXPECT_EQ(S.toString(), R"repr(deref("Id"))repr"); +} + +TEST(StencilToStringTest, AddressOfOp) { + auto S = cat(addressOf("Id")); + EXPECT_EQ(S.toString(), R"repr(addressOf("Id"))repr"); +} + +TEST(StencilToStringTest, AccessOp) { + auto S = cat(access("Id", text("memberData"))); + EXPECT_EQ(S.toString(), R"repr(access("Id", "memberData"))repr"); +} + +TEST(StencilToStringTest, AccessOpStencilPart) { + auto S = cat(access("Id", access("subId", "memberData"))); + EXPECT_EQ(S.toString(), + R"repr(access("Id", access("subId", "memberData")))repr"); +} + +TEST(StencilToStringTest, IfBoundOp) { + auto S = cat(ifBound("Id", text("trueText"), access("exprId", "memberData"))); + EXPECT_EQ( + S.toString(), + R"repr(ifBound("Id", "trueText", access("exprId", "memberData")))repr"); +} + +TEST(StencilToStringTest, MultipleOp) { + auto S = cat("foo", access("x", "m()"), "bar", + ifBound("x", text("t"), access("e", "f"))); + EXPECT_EQ(S.toString(), R"repr("foo", access("x", "m()"), "bar", )repr" + R"repr(ifBound("x", "t", access("e", "f")))repr"); +} } // namespace From b3342e180e9c2cc59580796d3c4b88a1b7d65434 Mon Sep 17 00:00:00 2001 From: Wenlei He Date: Mon, 7 Oct 2019 16:30:31 +0000 Subject: [PATCH 121/254] [llvm-profdata] Minor format fix Summary: Minor format fix for output of "llvm-profdata -show" Reviewers: wmi Subscribers: hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D68440 llvm-svn: 373917 --- llvm/lib/ProfileData/SampleProf.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/lib/ProfileData/SampleProf.cpp b/llvm/lib/ProfileData/SampleProf.cpp index 2150c5d5a70bb..003e8d4d42969 100644 --- a/llvm/lib/ProfileData/SampleProf.cpp +++ b/llvm/lib/ProfileData/SampleProf.cpp @@ -155,6 +155,7 @@ void FunctionSamples::print(raw_ostream &OS, unsigned Indent) const { FS.second.print(OS, Indent + 4); } } + OS.indent(Indent); OS << "}\n"; } else { OS << "No inlined callsites in this function\n"; From 7e38f0c408058ec104df4f92b9d0789ab07f21e8 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 7 Oct 2019 16:42:25 +0000 Subject: [PATCH 122/254] Codegen - silence static analyzer getAs<> null dereference warnings. NFCI. The static analyzer is warning about potential null dereferences, but in these cases we should be able to use castAs<> directly and if not assert will fire for us. llvm-svn: 373918 --- clang/lib/CodeGen/CGAtomic.cpp | 2 +- clang/lib/CodeGen/CGCXX.cpp | 4 ++-- clang/lib/CodeGen/CGClass.cpp | 2 +- clang/lib/CodeGen/CGDebugInfo.cpp | 4 ++-- clang/lib/CodeGen/CGExpr.cpp | 6 +++--- clang/lib/CodeGen/CGObjCGNU.cpp | 2 +- clang/lib/CodeGen/MicrosoftCXXABI.cpp | 2 +- clang/lib/CodeGen/TargetInfo.cpp | 2 +- 8 files changed, 12 insertions(+), 12 deletions(-) diff --git a/clang/lib/CodeGen/CGAtomic.cpp b/clang/lib/CodeGen/CGAtomic.cpp index afddfb76605cd..5059163507503 100644 --- a/clang/lib/CodeGen/CGAtomic.cpp +++ b/clang/lib/CodeGen/CGAtomic.cpp @@ -970,7 +970,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { auto CastToGenericAddrSpace = [&](llvm::Value *V, QualType PT) { if (!E->isOpenCL()) return V; - auto AS = PT->getAs()->getPointeeType().getAddressSpace(); + auto AS = PT->castAs()->getPointeeType().getAddressSpace(); if (AS == LangAS::opencl_generic) return V; auto DestAS = getContext().getTargetAddressSpace(LangAS::opencl_generic); diff --git a/clang/lib/CodeGen/CGCXX.cpp b/clang/lib/CodeGen/CGCXX.cpp index a39bf0f6de2f8..7e5fe0fd6b1d5 100644 --- a/clang/lib/CodeGen/CGCXX.cpp +++ b/clang/lib/CodeGen/CGCXX.cpp @@ -104,8 +104,8 @@ bool CodeGenModule::TryEmitBaseDestructorAsAlias(const CXXDestructorDecl *D) { // Give up if the calling conventions don't match. We could update the call, // but it is probably not worth it. const CXXDestructorDecl *BaseD = UniqueBase->getDestructor(); - if (BaseD->getType()->getAs()->getCallConv() != - D->getType()->getAs()->getCallConv()) + if (BaseD->getType()->castAs()->getCallConv() != + D->getType()->castAs()->getCallConv()) return true; GlobalDecl AliasDecl(D, Dtor_Base); diff --git a/clang/lib/CodeGen/CGClass.cpp b/clang/lib/CodeGen/CGClass.cpp index f228329ad9b6a..ab8588303d29f 100644 --- a/clang/lib/CodeGen/CGClass.cpp +++ b/clang/lib/CodeGen/CGClass.cpp @@ -739,7 +739,7 @@ bool CodeGenFunction::IsConstructorDelegationValid( // We also disable the optimization for variadic functions because // it's impossible to "re-pass" varargs. - if (Ctor->getType()->getAs()->isVariadic()) + if (Ctor->getType()->castAs()->isVariadic()) return false; // FIXME: Decide if we can do a delegation of a delegating constructor. diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp index ca91618f1627d..be542bc7a63d6 100644 --- a/clang/lib/CodeGen/CGDebugInfo.cpp +++ b/clang/lib/CodeGen/CGDebugInfo.cpp @@ -1659,7 +1659,7 @@ void CGDebugInfo::CollectCXXMemberFunctions( if (!Method || Method->isImplicit() || Method->hasAttr()) continue; - if (Method->getType()->getAs()->getContainedAutoType()) + if (Method->getType()->castAs()->getContainedAutoType()) continue; // Reuse the existing member function declaration if it exists. @@ -4561,7 +4561,7 @@ void CGDebugInfo::EmitUsingDecl(const UsingDecl &UD) { // return type in the definition) if (const auto *FD = dyn_cast(USD.getUnderlyingDecl())) if (const auto *AT = - FD->getType()->getAs()->getContainedAutoType()) + FD->getType()->castAs()->getContainedAutoType()) if (AT->getDeducedType().isNull()) return; if (llvm::DINode *Target = diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index 776d21f4ea968..1ea02677b4407 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -997,7 +997,7 @@ EmitComplexPrePostIncDec(const UnaryOperator *E, LValue LV, // Add the inc/dec to the real part. NextVal = Builder.CreateAdd(InVal.first, NextVal, isInc ? "inc" : "dec"); } else { - QualType ElemTy = E->getType()->getAs()->getElementType(); + QualType ElemTy = E->getType()->castAs()->getElementType(); llvm::APFloat FVal(getContext().getFloatTypeSemantics(ElemTy), 1); if (!isInc) FVal.changeSign(); @@ -2194,7 +2194,7 @@ static void setObjCGCLValueClass(const ASTContext &Ctx, const Expr *E, // If ivar is a structure pointer, assigning to field of // this struct follows gcc's behavior and makes it a non-ivar // writer-barrier conservatively. - ExpTy = ExpTy->getAs()->getPointeeType(); + ExpTy = ExpTy->castAs()->getPointeeType(); if (ExpTy->isRecordType()) { LV.setObjCIvar(false); return; @@ -2230,7 +2230,7 @@ static void setObjCGCLValueClass(const ASTContext &Ctx, const Expr *E, // a non-ivar write-barrier. QualType ExpTy = E->getType(); if (ExpTy->isPointerType()) - ExpTy = ExpTy->getAs()->getPointeeType(); + ExpTy = ExpTy->castAs()->getPointeeType(); if (ExpTy->isRecordType()) LV.setObjCIvar(false); } diff --git a/clang/lib/CodeGen/CGObjCGNU.cpp b/clang/lib/CodeGen/CGObjCGNU.cpp index c21f257442c98..d2c089d0360e1 100644 --- a/clang/lib/CodeGen/CGObjCGNU.cpp +++ b/clang/lib/CodeGen/CGObjCGNU.cpp @@ -4040,7 +4040,7 @@ LValue CGObjCGNU::EmitObjCValueForIvar(CodeGenFunction &CGF, const ObjCIvarDecl *Ivar, unsigned CVRQualifiers) { const ObjCInterfaceDecl *ID = - ObjectTy->getAs()->getInterface(); + ObjectTy->castAs()->getInterface(); return EmitValueForIvarAtOffset(CGF, ID, BaseValue, Ivar, CVRQualifiers, EmitIvarOffset(CGF, ID, Ivar)); } diff --git a/clang/lib/CodeGen/MicrosoftCXXABI.cpp b/clang/lib/CodeGen/MicrosoftCXXABI.cpp index 8a5ea844c21fc..2d8b538bc2eec 100644 --- a/clang/lib/CodeGen/MicrosoftCXXABI.cpp +++ b/clang/lib/CodeGen/MicrosoftCXXABI.cpp @@ -1210,7 +1210,7 @@ static bool hasDefaultCXXMethodCC(ASTContext &Context, CallingConv ExpectedCallingConv = Context.getDefaultCallingConvention( /*IsVariadic=*/false, /*IsCXXMethod=*/true); CallingConv ActualCallingConv = - MD->getType()->getAs()->getCallConv(); + MD->getType()->castAs()->getCallConv(); return ExpectedCallingConv == ActualCallingConv; } diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp index 497f3f7481288..b281db332143d 100644 --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -9532,7 +9532,7 @@ ABIArgInfo RISCVABIInfo::classifyArgumentType(QualType Ty, bool IsFixed, // Complex types for the hard float ABI must be passed direct rather than // using CoerceAndExpand. if (IsFixed && Ty->isComplexType() && FLen && ArgFPRsLeft >= 2) { - QualType EltTy = Ty->getAs()->getElementType(); + QualType EltTy = Ty->castAs()->getElementType(); if (getContext().getTypeSize(EltTy) <= FLen) { ArgFPRsLeft -= 2; return ABIArgInfo::getDirect(); From 283df8cf742803ad0946dbb2b4ff1e9d1eaed7aa Mon Sep 17 00:00:00 2001 From: Wei Mi Date: Mon, 7 Oct 2019 16:45:47 +0000 Subject: [PATCH 123/254] Fix build errors caused by rL373914. llvm-svn: 373919 --- llvm/include/llvm/ProfileData/SampleProfWriter.h | 8 ++++---- llvm/lib/ProfileData/SampleProfReader.cpp | 3 ++- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/llvm/include/llvm/ProfileData/SampleProfWriter.h b/llvm/include/llvm/ProfileData/SampleProfWriter.h index ae7ef2deaf059..ce60baf66c65c 100644 --- a/llvm/include/llvm/ProfileData/SampleProfWriter.h +++ b/llvm/include/llvm/ProfileData/SampleProfWriter.h @@ -202,10 +202,10 @@ class SampleProfileWriterExtBinary : public SampleProfileWriterExtBinaryBase { private: virtual void initSectionLayout() override { - SectionLayout = {{SecProfSummary}, - {SecNameTable}, - {SecLBRProfile}, - {SecProfileSymbolList}}; + SectionLayout = {{SecProfSummary, 0, 0, 0}, + {SecNameTable, 0, 0, 0}, + {SecLBRProfile, 0, 0, 0}, + {SecProfileSymbolList, 0, 0, 0}}; }; virtual std::error_code writeSections(const StringMap &ProfileMap) override; diff --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp index 075bd9412b0c9..6d00404b0bb22 100644 --- a/llvm/lib/ProfileData/SampleProfReader.cpp +++ b/llvm/lib/ProfileData/SampleProfReader.cpp @@ -530,8 +530,9 @@ std::error_code SampleProfileReaderExtBinaryBase::decompressSection( StringRef CompressedStrings(reinterpret_cast(Data), *CompressSize); char *Buffer = Allocator.Allocate(DecompressBufSize); + size_t UCSize = DecompressBufSize; llvm::Error E = - zlib::uncompress(CompressedStrings, Buffer, DecompressBufSize); + zlib::uncompress(CompressedStrings, Buffer, UCSize); if (E) return sampleprof_error::uncompress_failed; DecompressBuf = reinterpret_cast(Buffer); From a65cfe3037a9555b0f85e76cf540d57fdcaac266 Mon Sep 17 00:00:00 2001 From: Paul Hoad Date: Mon, 7 Oct 2019 16:53:35 +0000 Subject: [PATCH 124/254] [clang-format] [NFC] Ensure clang-format is itself clang-formatted. Summary: Before making a proposed change, ensure ClangFormat.cpp is fully clang-formatted, no functional change just clang-formatting using the in tree .clang-format. Reviewers: mitchell-stellar Reviewed By: mitchell-stellar Subscribers: Eugene.Zelenko, cfe-commits Tags: #clang-format, #clang Differential Revision: https://reviews.llvm.org/D68551 llvm-svn: 373921 --- clang/tools/clang-format/ClangFormat.cpp | 71 ++++++++++++------------ 1 file changed, 37 insertions(+), 34 deletions(-) diff --git a/clang/tools/clang-format/ClangFormat.cpp b/clang/tools/clang-format/ClangFormat.cpp index c0c8dc0e2c7ba..f2d57ee2832dc 100644 --- a/clang/tools/clang-format/ClangFormat.cpp +++ b/clang/tools/clang-format/ClangFormat.cpp @@ -51,13 +51,14 @@ static cl::list "Can only be used with one input file."), cl::cat(ClangFormatCategory)); static cl::list -LineRanges("lines", cl::desc(": - format a range of\n" - "lines (both 1-based).\n" - "Multiple ranges can be formatted by specifying\n" - "several -lines arguments.\n" - "Can't be used with -offset and -length.\n" - "Can only be used with one input file."), - cl::cat(ClangFormatCategory)); + LineRanges("lines", + cl::desc(": - format a range of\n" + "lines (both 1-based).\n" + "Multiple ranges can be formatted by specifying\n" + "several -lines arguments.\n" + "Can't be used with -offset and -length.\n" + "Can only be used with one input file."), + cl::cat(ClangFormatCategory)); static cl::opt Style("style", cl::desc(clang::format::StyleOptionHelpDescription), cl::init(clang::format::DefaultFormatStyle), @@ -72,12 +73,12 @@ static cl::opt cl::init(clang::format::DefaultFallbackStyle), cl::cat(ClangFormatCategory)); -static cl::opt -AssumeFileName("assume-filename", - cl::desc("When reading from stdin, clang-format assumes this\n" - "filename to look for a style config file (with\n" - "-style=file) and to determine the language."), - cl::init(""), cl::cat(ClangFormatCategory)); +static cl::opt AssumeFileName( + "assume-filename", + cl::desc("When reading from stdin, clang-format assumes this\n" + "filename to look for a style config file (with\n" + "-style=file) and to determine the language."), + cl::init(""), cl::cat(ClangFormatCategory)); static cl::opt Inplace("i", cl::desc("Inplace edit s, if specified."), @@ -249,8 +250,8 @@ static bool format(StringRef FileName) { // On Windows, overwriting a file with an open file mapping doesn't work, // so read the whole file into memory when formatting in-place. ErrorOr> CodeOrErr = - !OutputXML && Inplace ? MemoryBuffer::getFileAsStream(FileName) : - MemoryBuffer::getFileOrSTDIN(FileName); + !OutputXML && Inplace ? MemoryBuffer::getFileAsStream(FileName) + : MemoryBuffer::getFileOrSTDIN(FileName); if (std::error_code EC = CodeOrErr.getError()) { errs() << EC.message() << "\n"; return true; @@ -264,20 +265,21 @@ static bool format(StringRef FileName) { // https://en.wikipedia.org/wiki/Byte_order_mark#Byte_order_marks_by_encoding // for more information. StringRef BufStr = Code->getBuffer(); - const char *InvalidBOM = llvm::StringSwitch(BufStr) - .StartsWith(llvm::StringLiteral::withInnerNUL("\x00\x00\xFE\xFF"), - "UTF-32 (BE)") - .StartsWith(llvm::StringLiteral::withInnerNUL("\xFF\xFE\x00\x00"), - "UTF-32 (LE)") - .StartsWith("\xFE\xFF", "UTF-16 (BE)") - .StartsWith("\xFF\xFE", "UTF-16 (LE)") - .StartsWith("\x2B\x2F\x76", "UTF-7") - .StartsWith("\xF7\x64\x4C", "UTF-1") - .StartsWith("\xDD\x73\x66\x73", "UTF-EBCDIC") - .StartsWith("\x0E\xFE\xFF", "SCSU") - .StartsWith("\xFB\xEE\x28", "BOCU-1") - .StartsWith("\x84\x31\x95\x33", "GB-18030") - .Default(nullptr); + const char *InvalidBOM = + llvm::StringSwitch(BufStr) + .StartsWith(llvm::StringLiteral::withInnerNUL("\x00\x00\xFE\xFF"), + "UTF-32 (BE)") + .StartsWith(llvm::StringLiteral::withInnerNUL("\xFF\xFE\x00\x00"), + "UTF-32 (LE)") + .StartsWith("\xFE\xFF", "UTF-16 (BE)") + .StartsWith("\xFF\xFE", "UTF-16 (LE)") + .StartsWith("\x2B\x2F\x76", "UTF-7") + .StartsWith("\xF7\x64\x4C", "UTF-1") + .StartsWith("\xDD\x73\x66\x73", "UTF-EBCDIC") + .StartsWith("\x0E\xFE\xFF", "SCSU") + .StartsWith("\xFB\xEE\x28", "BOCU-1") + .StartsWith("\x84\x31\x95\x33", "GB-18030") + .Default(nullptr); if (InvalidBOM) { errs() << "error: encoding with unsupported byte order mark \"" @@ -313,8 +315,8 @@ static bool format(StringRef FileName) { // Get new affected ranges after sorting `#includes`. Ranges = tooling::calculateRangesAfterReplacements(Replaces, Ranges); FormattingAttemptStatus Status; - Replacements FormatChanges = reformat(*FormatStyle, *ChangedCode, Ranges, - AssumedFileName, &Status); + Replacements FormatChanges = + reformat(*FormatStyle, *ChangedCode, Ranges, AssumedFileName, &Status); Replaces = Replaces.merge(FormatChanges); if (OutputXML) { outs() << "\n Date: Mon, 7 Oct 2019 17:03:44 +0000 Subject: [PATCH 125/254] [clang-format] [PR27004] omits leading space for noexcept when formatting operator delete() Summary: clang-format is incorrectly thinking the parameter parens are part of a cast operation, this is resulting in there sometimes being not space between the paren and the noexcept (and other keywords like volatile etc..) ``` void operator++(int) noexcept; void operator++(int &) noexcept; void operator delete(void *, std::size_t, const std::nothrow_t &)noexcept; ``` Reviewers: klimek, owenpan, mitchell-stellar Reviewed By: mitchell-stellar Subscribers: cfe-commits Tags: #clang-format, #clang Differential Revision: https://reviews.llvm.org/D68481 llvm-svn: 373922 --- clang/lib/Format/TokenAnnotator.cpp | 7 +++++++ clang/unittests/Format/FormatTest.cpp | 27 +++++++++++++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index 2cc69108b2f9e..d4519fe9c4a39 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -1611,6 +1611,13 @@ class AnnotatingParser { if (Tok.Next->is(tok::question)) return false; + // Functions which end with decorations like volatile, noexcept are unlikely + // to be casts. + if (Tok.Next->isOneOf(tok::kw_noexcept, tok::kw_volatile, tok::kw_const, + tok::kw_throw, tok::l_square, tok::arrow, + Keywords.kw_override, Keywords.kw_final)) + return false; + // As Java has no function types, a "(" after the ")" likely means that this // is a cast. if (Style.Language == FormatStyle::LK_Java && Tok.Next->is(tok::l_paren)) diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index 8263dc41b315f..8c2bc1ab22839 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -14678,6 +14678,33 @@ TEST_F(FormatTest, AlternativeOperators) { */ } +TEST_F(FormatTest, NotCastRPaen) { + + verifyFormat("void operator++(int) noexcept;"); + verifyFormat("void operator++(int &) noexcept;"); + verifyFormat("void operator delete(void *, std::size_t, const std::nothrow_t " + "&) noexcept;"); + verifyFormat( + "void operator delete(std::size_t, const std::nothrow_t &) noexcept;"); + verifyFormat("void operator delete(const std::nothrow_t &) noexcept;"); + verifyFormat("void operator delete(std::nothrow_t &) noexcept;"); + verifyFormat("void operator delete(nothrow_t &) noexcept;"); + verifyFormat("void operator delete(foo &) noexcept;"); + verifyFormat("void operator delete(foo) noexcept;"); + verifyFormat("void operator delete(int) noexcept;"); + verifyFormat("void operator delete(int &) noexcept;"); + verifyFormat("void operator delete(int &) volatile noexcept;"); + verifyFormat("void operator delete(int &) const"); + verifyFormat("void operator delete(int &) = default"); + verifyFormat("void operator delete(int &) = delete"); + verifyFormat("void operator delete(int &) [[noreturn]]"); + verifyFormat("void operator delete(int &) throw();"); + verifyFormat("void operator delete(int &) throw(int);"); + verifyFormat("auto operator delete(int &) -> int;"); + verifyFormat("auto operator delete(int &) override"); + verifyFormat("auto operator delete(int &) final"); +} + } // end namespace } // end namespace format } // end namespace clang From 90b7dc9e716450ec8ce3d64cccbb6acdb750a609 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Mon, 7 Oct 2019 17:05:09 +0000 Subject: [PATCH 126/254] [Remarks] Pass StringBlockValue as StringRef. After changing the remark serialization, we now pass StringRefs to the serializer. We should use StringRef for StringBlockVal, to avoid creating temporary objects, which then cause StringBlockVal.Value to point to invalid memory. Reviewers: thegameg, anemet Reviewed By: thegameg Differential Revision: https://reviews.llvm.org/D68571 llvm-svn: 373923 --- llvm/lib/Remarks/YAMLRemarkSerializer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Remarks/YAMLRemarkSerializer.cpp b/llvm/lib/Remarks/YAMLRemarkSerializer.cpp index 66eb06bbc4f5d..3a42fe0678eb1 100644 --- a/llvm/lib/Remarks/YAMLRemarkSerializer.cpp +++ b/llvm/lib/Remarks/YAMLRemarkSerializer.cpp @@ -103,7 +103,7 @@ template <> struct MappingTraits { /// newlines in strings. struct StringBlockVal { StringRef Value; - StringBlockVal(const std::string &Value) : Value(Value) {} + StringBlockVal(StringRef R) : Value(R) {} }; template <> struct BlockScalarTraits { From de8599776b52b7d130fbe8373827f6a6a7c9f97d Mon Sep 17 00:00:00 2001 From: Kadir Cetinkaya Date: Mon, 7 Oct 2019 17:12:18 +0000 Subject: [PATCH 127/254] [clangd] Fix raciness in code completion tests Reviewers: sammccall, ilya-biryukov Subscribers: MaskRay, jkorous, arphaman, usaxena95, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D68273 llvm-svn: 373924 --- .../clangd/unittests/CodeCompleteTests.cpp | 30 ++++++++++++------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp b/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp index 4cfb1a2d63195..68d0a46760985 100644 --- a/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp +++ b/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp @@ -18,6 +18,7 @@ #include "TestFS.h" #include "TestIndex.h" #include "TestTU.h" +#include "Threading.h" #include "index/Index.h" #include "index/MemIndex.h" #include "clang/Sema/CodeCompleteConsumer.h" @@ -27,6 +28,8 @@ #include "llvm/Testing/Support/Error.h" #include "gmock/gmock.h" #include "gtest/gtest.h" +#include +#include namespace clang { namespace clangd { @@ -1112,8 +1115,9 @@ class IndexRequestCollector : public SymbolIndex { bool fuzzyFind(const FuzzyFindRequest &Req, llvm::function_ref Callback) const override { - std::lock_guard Lock(Mut); + std::unique_lock Lock(Mut); Requests.push_back(Req); + ReceivedRequestCV.notify_one(); return true; } @@ -1131,8 +1135,10 @@ class IndexRequestCollector : public SymbolIndex { // isn't used in production code. size_t estimateMemoryUsage() const override { return 0; } - const std::vector consumeRequests() const { - std::lock_guard Lock(Mut); + const std::vector consumeRequests(size_t Num) const { + std::unique_lock Lock(Mut); + EXPECT_TRUE(wait(Lock, ReceivedRequestCV, timeoutSeconds(10), + [this, Num] { return Requests.size() == Num; })); auto Reqs = std::move(Requests); Requests = {}; return Reqs; @@ -1140,16 +1146,21 @@ class IndexRequestCollector : public SymbolIndex { private: // We need a mutex to handle async fuzzy find requests. + mutable std::condition_variable ReceivedRequestCV; mutable std::mutex Mut; mutable std::vector Requests; }; -std::vector captureIndexRequests(llvm::StringRef Code) { +// Clients have to consume exactly Num requests. +std::vector captureIndexRequests(llvm::StringRef Code, + size_t Num = 1) { clangd::CodeCompleteOptions Opts; IndexRequestCollector Requests; Opts.Index = &Requests; completions(Code, {}, Opts); - return Requests.consumeRequests(); + const auto Reqs = Requests.consumeRequests(Num); + EXPECT_EQ(Reqs.size(), Num); + return Reqs; } TEST(CompletionTest, UnqualifiedIdQuery) { @@ -2098,18 +2109,15 @@ TEST(CompletionTest, EnableSpeculativeIndexRequest) { auto CompleteAtPoint = [&](StringRef P) { cantFail(runCodeComplete(Server, File, Test.point(P), Opts)); - // Sleep for a while to make sure asynchronous call (if applicable) is also - // triggered before callback is invoked. - std::this_thread::sleep_for(std::chrono::milliseconds(100)); }; CompleteAtPoint("1"); - auto Reqs1 = Requests.consumeRequests(); + auto Reqs1 = Requests.consumeRequests(1); ASSERT_EQ(Reqs1.size(), 1u); EXPECT_THAT(Reqs1[0].Scopes, UnorderedElementsAre("ns1::")); CompleteAtPoint("2"); - auto Reqs2 = Requests.consumeRequests(); + auto Reqs2 = Requests.consumeRequests(1); // Speculation succeeded. Used speculative index result. ASSERT_EQ(Reqs2.size(), 1u); EXPECT_EQ(Reqs2[0], Reqs1[0]); @@ -2117,7 +2125,7 @@ TEST(CompletionTest, EnableSpeculativeIndexRequest) { CompleteAtPoint("3"); // Speculation failed. Sent speculative index request and the new index // request after sema. - auto Reqs3 = Requests.consumeRequests(); + auto Reqs3 = Requests.consumeRequests(2); ASSERT_EQ(Reqs3.size(), 2u); } From f7bd5bffede5c97f92caaa5804f1768c0c7bbbc4 Mon Sep 17 00:00:00 2001 From: Pavel Labath Date: Mon, 7 Oct 2019 17:17:53 +0000 Subject: [PATCH 128/254] ProcessInstanceInfoMatch: Don't match processes with no name if a name match was requested Since D68289, a couple of tests on linux started being extremely flaky. All of them were doing name-based attaching and were failing because they couldn't find an unambiguous process to attach to. The patch above changed the process finding logic, so that failure to find a process name does not constitute an error. This meant that a lot more transient processes showed up in the process list during the test suite run. Previously, these processes would not appear as they would be gone by the time we went to read their executable name, arguments, etc. Now, this alone should not cause an issue were it not for the fact that we were considering a process with no name as if it matched by default (even if we were explicitly searching for a process with a specified name). This meant that any of the "transient" processes with no name would make the name match ambiguous. That clearly seems like a bug to me so I fix that. llvm-svn: 373925 --- lldb/source/Utility/ProcessInfo.cpp | 2 +- .../Utility/ProcessInstanceInfoTest.cpp | 17 +++++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/lldb/source/Utility/ProcessInfo.cpp b/lldb/source/Utility/ProcessInfo.cpp index 832e5efae29c8..6be47d377a2af 100644 --- a/lldb/source/Utility/ProcessInfo.cpp +++ b/lldb/source/Utility/ProcessInfo.cpp @@ -244,7 +244,7 @@ void ProcessInstanceInfo::DumpAsTableRow(Stream &s, UserIDResolver &resolver, } bool ProcessInstanceInfoMatch::NameMatches(const char *process_name) const { - if (m_name_match_type == NameMatch::Ignore || process_name == nullptr) + if (m_name_match_type == NameMatch::Ignore) return true; const char *match_name = m_match_info.GetName(); if (!match_name) diff --git a/lldb/unittests/Utility/ProcessInstanceInfoTest.cpp b/lldb/unittests/Utility/ProcessInstanceInfoTest.cpp index 73978836c5be5..1d363ac80a365 100644 --- a/lldb/unittests/Utility/ProcessInstanceInfoTest.cpp +++ b/lldb/unittests/Utility/ProcessInstanceInfoTest.cpp @@ -91,3 +91,20 @@ TEST(ProcessInstanceInfo, DumpTable_invalidUID) { )", s.GetData()); } + +TEST(ProcessInstanceInfoMatch, Name) { + ProcessInstanceInfo info_bar, info_empty; + info_bar.GetExecutableFile().SetFile("/foo/bar", FileSpec::Style::posix); + + ProcessInstanceInfoMatch match; + match.SetNameMatchType(NameMatch::Equals); + match.GetProcessInfo().GetExecutableFile().SetFile("bar", + FileSpec::Style::posix); + + EXPECT_TRUE(match.Matches(info_bar)); + EXPECT_FALSE(match.Matches(info_empty)); + + match.GetProcessInfo().GetExecutableFile() = FileSpec(); + EXPECT_TRUE(match.Matches(info_bar)); + EXPECT_TRUE(match.Matches(info_empty)); +} From fccfe2c04abf17f9e8860bbc6aefd2090711c92e Mon Sep 17 00:00:00 2001 From: Vedant Kumar Date: Mon, 7 Oct 2019 17:22:50 +0000 Subject: [PATCH 129/254] [DWARFASTParserClang] Delete commented-out typedef, NFC (& group together all the protected members & typedefs) llvm-svn: 373926 --- .../SymbolFile/DWARF/DWARFASTParserClang.h | 37 ++++++++++--------- 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h index a8daa10596e57..c0c32c5bba6cf 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h @@ -63,9 +63,28 @@ class DWARFASTParserClang : public DWARFASTParser { lldb_private::ClangASTImporter &GetClangASTImporter(); protected: + /// Protected typedefs and members. + /// @{ class DelayedAddObjCClassProperty; typedef std::vector DelayedPropertyList; + typedef llvm::SmallPtrSet DIEPointerSet; + typedef llvm::DenseMap + DIEToDeclContextMap; + typedef std::multimap + DeclContextToDIEMap; + typedef llvm::DenseMap + DIEToDeclMap; + typedef llvm::DenseMap DeclToDIEMap; + + lldb_private::ClangASTContext &m_ast; + DIEToDeclMap m_die_to_decl; + DeclToDIEMap m_decl_to_die; + DIEToDeclContextMap m_die_to_decl_ctx; + DeclContextToDIEMap m_decl_ctx_to_die; + std::unique_ptr m_clang_ast_importer_up; + /// @} + clang::DeclContext *GetDeclContextForBlock(const DWARFDIE &die); clang::BlockDecl *ResolveBlockDIE(const DWARFDIE &die); @@ -128,24 +147,6 @@ class DWARFASTParserClang : public DWARFASTParser { // Return true if this type is a declaration to a type in an external // module. lldb::ModuleSP GetModuleForType(const DWARFDIE &die); - - typedef llvm::SmallPtrSet DIEPointerSet; - typedef llvm::DenseMap - DIEToDeclContextMap; - // typedef llvm::DenseMap - // DeclContextToDIEMap; - typedef std::multimap - DeclContextToDIEMap; - typedef llvm::DenseMap - DIEToDeclMap; - typedef llvm::DenseMap DeclToDIEMap; - - lldb_private::ClangASTContext &m_ast; - DIEToDeclMap m_die_to_decl; - DeclToDIEMap m_decl_to_die; - DIEToDeclContextMap m_die_to_decl_ctx; - DeclContextToDIEMap m_decl_ctx_to_die; - std::unique_ptr m_clang_ast_importer_up; }; #endif // SymbolFileDWARF_DWARFASTParserClang_h_ From 40a1853c497d9cbedd7ce8b7edbf3ebb31f826bf Mon Sep 17 00:00:00 2001 From: Vedant Kumar Date: Mon, 7 Oct 2019 17:22:53 +0000 Subject: [PATCH 130/254] [DWARFASTParserClang] Factor out structure-like type parsing, NFC Split out the logic to parse structure-like types into a separate function, in an attempt to reduce the complexity of ParseTypeFromDWARF. Inspired by discussion in https://reviews.llvm.org/D68130. Differential Revision: https://reviews.llvm.org/D68422 llvm-svn: 373927 --- .../Plugins/SymbolFile/DWARF/DWARFASTParser.h | 1 - .../SymbolFile/DWARF/DWARFASTParserClang.cpp | 849 +++++++++--------- .../SymbolFile/DWARF/DWARFASTParserClang.h | 52 +- .../SymbolFile/DWARF/SymbolFileDWARF.cpp | 3 +- 4 files changed, 473 insertions(+), 432 deletions(-) diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.h index 4f6885e611071..7ee4727cde914 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.h @@ -28,7 +28,6 @@ class DWARFASTParser { virtual lldb::TypeSP ParseTypeFromDWARF(const lldb_private::SymbolContext &sc, const DWARFDIE &die, - lldb_private::Log *log, bool *type_is_new_ptr) = 0; virtual lldb_private::Function * diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp index e031618a11874..607a16c59fb18 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp @@ -9,7 +9,6 @@ #include #include "DWARFASTParserClang.h" -#include "DWARFDIE.h" #include "DWARFDebugInfo.h" #include "DWARFDeclContext.h" #include "DWARFDefines.h" @@ -232,42 +231,7 @@ static void CompleteExternalTagDeclType(ClangASTImporter &ast_importer, } } -namespace { -/// Parsed form of all attributes that are relevant for type reconstruction. -/// Some attributes are relevant for all kinds of types (declaration), while -/// others are only meaningful to a specific type (is_virtual) -struct ParsedTypeAttributes { - explicit ParsedTypeAttributes(const DWARFDIE &die); - - AccessType accessibility = eAccessNone; - bool is_artificial = false; - bool is_complete_objc_class = false; - bool is_explicit = false; - bool is_forward_declaration = false; - bool is_inline = false; - bool is_scoped_enum = false; - bool is_vector = false; - bool is_virtual = false; - clang::StorageClass storage = clang::SC_None; - const char *mangled_name = nullptr; - ConstString name; - Declaration decl; - DWARFDIE object_pointer; - DWARFFormValue abstract_origin; - DWARFFormValue containing_type; - DWARFFormValue signature; - DWARFFormValue specification; - DWARFFormValue type; - LanguageType class_language = eLanguageTypeUnknown; - llvm::Optional byte_size; - size_t calling_convention = llvm::dwarf::DW_CC_normal; - uint32_t bit_stride = 0; - uint32_t byte_stride = 0; - uint32_t encoding = 0; -}; -} // namespace - -ParsedTypeAttributes::ParsedTypeAttributes(const DWARFDIE &die) { +ParsedDWARFTypeAttributes::ParsedDWARFTypeAttributes(const DWARFDIE &die) { DWARFAttributes attributes; size_t num_attributes = die.GetAttributes(attributes); for (size_t i = 0; i < num_attributes; ++i) { @@ -394,13 +358,17 @@ static std::string GetUnitName(const DWARFDIE &die) { } TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc, - const DWARFDIE &die, Log *log, + const DWARFDIE &die, bool *type_is_new_ptr) { if (type_is_new_ptr) *type_is_new_ptr = false; if (!die) return nullptr; + + Log *log(LogChannelDWARF::GetLogIfAny(DWARF_LOG_TYPE_COMPLETION | + DWARF_LOG_LOOKUPS)); + SymbolFileDWARF *dwarf = die.GetDWARF(); if (log) { DWARFDIE context_die; @@ -424,11 +392,11 @@ TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc, // Set a bit that lets us know that we are currently parsing this dwarf->GetDIEToType()[die.GetDIE()] = DIE_IS_BEING_PARSED; - ParsedTypeAttributes attrs(die); + ParsedDWARFTypeAttributes attrs(die); if (DWARFDIE signature_die = attrs.signature.Reference()) { if (TypeSP type_sp = - ParseTypeFromDWARF(sc, signature_die, log, type_is_new_ptr)) { + ParseTypeFromDWARF(sc, signature_die, type_is_new_ptr)) { dwarf->GetDIEToType()[die.GetDIE()] = type_sp.get(); if (clang::DeclContext *decl_ctx = GetCachedClangDeclContextForDIE(signature_die)) @@ -438,7 +406,6 @@ TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc, return nullptr; } - TypeList &type_list = dwarf->GetTypeList(); if (type_is_new_ptr) *type_is_new_ptr = true; @@ -566,7 +533,7 @@ TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc, bool function_type_is_new_pointer; TypeSP lldb_function_type_sp = ParseTypeFromDWARF( - sc, function_type, log, &function_type_is_new_pointer); + sc, function_type, &function_type_is_new_pointer); if (lldb_function_type_sp) { clang_type = m_ast.CreateBlockPointerType( @@ -664,372 +631,11 @@ TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc, case DW_TAG_structure_type: case DW_TAG_union_type: case DW_TAG_class_type: { - // UniqueDWARFASTType is large, so don't create a local variables on - // the stack, put it on the heap. This function is often called - // recursively and clang isn't good and sharing the stack space for - // variables in different blocks. - std::unique_ptr unique_ast_entry_up( - new UniqueDWARFASTType()); - - ConstString unique_typename(attrs.name); - Declaration unique_decl(attrs.decl); - - if (attrs.name) { - if (Language::LanguageIsCPlusPlus(cu_language)) { - // For C++, we rely solely upon the one definition rule that says - // only one thing can exist at a given decl context. We ignore the - // file and line that things are declared on. - std::string qualified_name; - if (die.GetQualifiedName(qualified_name)) - unique_typename = ConstString(qualified_name); - unique_decl.Clear(); - } - - if (dwarf->GetUniqueDWARFASTTypeMap().Find( - unique_typename, die, unique_decl, attrs.byte_size.getValueOr(-1), - *unique_ast_entry_up)) { - type_sp = unique_ast_entry_up->m_type_sp; - if (type_sp) { - dwarf->GetDIEToType()[die.GetDIE()] = type_sp.get(); - LinkDeclContextToDIE( - GetCachedClangDeclContextForDIE(unique_ast_entry_up->m_die), die); - return type_sp; - } - } - } - - DEBUG_PRINTF("0x%8.8" PRIx64 ": %s (\"%s\")\n", die.GetID(), - DW_TAG_value_to_name(tag), type_name_cstr); - - int tag_decl_kind = -1; - AccessType default_accessibility = eAccessNone; - if (tag == DW_TAG_structure_type) { - tag_decl_kind = clang::TTK_Struct; - default_accessibility = eAccessPublic; - } else if (tag == DW_TAG_union_type) { - tag_decl_kind = clang::TTK_Union; - default_accessibility = eAccessPublic; - } else if (tag == DW_TAG_class_type) { - tag_decl_kind = clang::TTK_Class; - default_accessibility = eAccessPrivate; - } - - if (attrs.byte_size && *attrs.byte_size == 0 && attrs.name && - !die.HasChildren() && cu_language == eLanguageTypeObjC) { - // Work around an issue with clang at the moment where forward - // declarations for objective C classes are emitted as: - // DW_TAG_structure_type [2] - // DW_AT_name( "ForwardObjcClass" ) - // DW_AT_byte_size( 0x00 ) - // DW_AT_decl_file( "..." ) - // DW_AT_decl_line( 1 ) - // - // Note that there is no DW_AT_declaration and there are no children, - // and the byte size is zero. - attrs.is_forward_declaration = true; - } - - if (attrs.class_language == eLanguageTypeObjC || - attrs.class_language == eLanguageTypeObjC_plus_plus) { - if (!attrs.is_complete_objc_class && - die.Supports_DW_AT_APPLE_objc_complete_type()) { - // We have a valid eSymbolTypeObjCClass class symbol whose name - // matches the current objective C class that we are trying to find - // and this DIE isn't the complete definition (we checked - // is_complete_objc_class above and know it is false), so the real - // definition is in here somewhere - type_sp = - dwarf->FindCompleteObjCDefinitionTypeForDIE(die, attrs.name, true); - - if (!type_sp) { - SymbolFileDWARFDebugMap *debug_map_symfile = - dwarf->GetDebugMapSymfile(); - if (debug_map_symfile) { - // We weren't able to find a full declaration in this DWARF, - // see if we have a declaration anywhere else... - type_sp = debug_map_symfile->FindCompleteObjCDefinitionTypeForDIE( - die, attrs.name, true); - } - } - - if (type_sp) { - if (log) { - dwarf->GetObjectFile()->GetModule()->LogMessage( - log, - "SymbolFileDWARF(%p) - 0x%8.8x: %s type \"%s\" is an " - "incomplete objc type, complete type is 0x%8.8" PRIx64, - static_cast(this), die.GetOffset(), - DW_TAG_value_to_name(tag), attrs.name.GetCString(), - type_sp->GetID()); - } - - // We found a real definition for this type elsewhere so lets use - // it and cache the fact that we found a complete type for this - // die - dwarf->GetDIEToType()[die.GetDIE()] = type_sp.get(); - return type_sp; - } - } - } - - if (attrs.is_forward_declaration) { - // We have a forward declaration to a type and we need to try and - // find a full declaration. We look in the current type index just in - // case we have a forward declaration followed by an actual - // declarations in the DWARF. If this fails, we need to look - // elsewhere... - if (log) { - dwarf->GetObjectFile()->GetModule()->LogMessage( - log, - "SymbolFileDWARF(%p) - 0x%8.8x: %s type \"%s\" is a " - "forward declaration, trying to find complete type", - static_cast(this), die.GetOffset(), - DW_TAG_value_to_name(tag), attrs.name.GetCString()); - } - - // See if the type comes from a DWO module and if so, track down that - // type. - type_sp = ParseTypeFromDWO(die, log); - if (type_sp) - return type_sp; - - DWARFDeclContext die_decl_ctx; - die.GetDWARFDeclContext(die_decl_ctx); - - // type_sp = FindDefinitionTypeForDIE (dwarf_cu, die, - // type_name_const_str); - type_sp = dwarf->FindDefinitionTypeForDWARFDeclContext(die_decl_ctx); - - if (!type_sp) { - SymbolFileDWARFDebugMap *debug_map_symfile = - dwarf->GetDebugMapSymfile(); - if (debug_map_symfile) { - // We weren't able to find a full declaration in this DWARF, see - // if we have a declaration anywhere else... - type_sp = debug_map_symfile->FindDefinitionTypeForDWARFDeclContext( - die_decl_ctx); - } - } - - if (type_sp) { - if (log) { - dwarf->GetObjectFile()->GetModule()->LogMessage( - log, - "SymbolFileDWARF(%p) - 0x%8.8x: %s type \"%s\" is a " - "forward declaration, complete type is 0x%8.8" PRIx64, - static_cast(this), die.GetOffset(), - DW_TAG_value_to_name(tag), attrs.name.GetCString(), - type_sp->GetID()); - } - - // We found a real definition for this type elsewhere so lets use - // it and cache the fact that we found a complete type for this die - dwarf->GetDIEToType()[die.GetDIE()] = type_sp.get(); - clang::DeclContext *defn_decl_ctx = - GetCachedClangDeclContextForDIE(dwarf->GetDIE(type_sp->GetID())); - if (defn_decl_ctx) - LinkDeclContextToDIE(defn_decl_ctx, die); - return type_sp; - } - } - assert(tag_decl_kind != -1); - bool clang_type_was_created = false; - clang_type.SetCompilerType( - &m_ast, dwarf->GetForwardDeclDieToClangType().lookup(die.GetDIE())); - if (!clang_type) { - clang::DeclContext *decl_ctx = - GetClangDeclContextContainingDIE(die, nullptr); - - // If your decl context is a record that was imported from another - // AST context (in the gmodules case), we need to make sure the type - // backing the Decl is complete before adding children to it. This is - // not an issue in the non-gmodules case because the debug info will - // always contain a full definition of parent types in that case. - CompleteExternalTagDeclType(GetClangASTImporter(), decl_ctx, die, - attrs.name.GetCString()); - - if (attrs.accessibility == eAccessNone && decl_ctx) { - // Check the decl context that contains this class/struct/union. If - // it is a class we must give it an accessibility. - const clang::Decl::Kind containing_decl_kind = decl_ctx->getDeclKind(); - if (DeclKindIsCXXClass(containing_decl_kind)) - attrs.accessibility = default_accessibility; - } - - ClangASTMetadata metadata; - metadata.SetUserID(die.GetID()); - metadata.SetIsDynamicCXXType(dwarf->ClassOrStructIsVirtual(die)); - - if (attrs.name.GetStringRef().contains('<')) { - ClangASTContext::TemplateParameterInfos template_param_infos; - if (ParseTemplateParameterInfos(die, template_param_infos)) { - clang::ClassTemplateDecl *class_template_decl = - m_ast.ParseClassTemplateDecl(decl_ctx, attrs.accessibility, - attrs.name.GetCString(), - tag_decl_kind, template_param_infos); - if (!class_template_decl) { - if (log) { - dwarf->GetObjectFile()->GetModule()->LogMessage( - log, - "SymbolFileDWARF(%p) - 0x%8.8x: %s type \"%s\" " - "clang::ClassTemplateDecl failed to return a decl.", - static_cast(this), die.GetOffset(), - DW_TAG_value_to_name(tag), attrs.name.GetCString()); - } - return TypeSP(); - } - - clang::ClassTemplateSpecializationDecl *class_specialization_decl = - m_ast.CreateClassTemplateSpecializationDecl( - decl_ctx, class_template_decl, tag_decl_kind, - template_param_infos); - clang_type = m_ast.CreateClassTemplateSpecializationType( - class_specialization_decl); - clang_type_was_created = true; - - m_ast.SetMetadata(class_template_decl, metadata); - m_ast.SetMetadata(class_specialization_decl, metadata); - } - } - - if (!clang_type_was_created) { - clang_type_was_created = true; - clang_type = m_ast.CreateRecordType( - decl_ctx, attrs.accessibility, attrs.name.GetCString(), - tag_decl_kind, attrs.class_language, &metadata); - } - } - - // Store a forward declaration to this class type in case any - // parameters in any class methods need it for the clang types for - // function prototypes. - LinkDeclContextToDIE(m_ast.GetDeclContextForType(clang_type), die); - type_sp = std::make_shared(die.GetID(), dwarf, attrs.name, - attrs.byte_size, nullptr, LLDB_INVALID_UID, - Type::eEncodingIsUID, &attrs.decl, clang_type, - Type::eResolveStateForward); - - type_sp->SetIsCompleteObjCClass(attrs.is_complete_objc_class); - - // Add our type to the unique type map so we don't end up creating many - // copies of the same type over and over in the ASTContext for our - // module - unique_ast_entry_up->m_type_sp = type_sp; - unique_ast_entry_up->m_die = die; - unique_ast_entry_up->m_declaration = unique_decl; - unique_ast_entry_up->m_byte_size = attrs.byte_size.getValueOr(0); - dwarf->GetUniqueDWARFASTTypeMap().Insert(unique_typename, - *unique_ast_entry_up); - - if (attrs.is_forward_declaration && die.HasChildren()) { - // Check to see if the DIE actually has a definition, some version of - // GCC will - // emit DIEs with DW_AT_declaration set to true, but yet still have - // subprogram, members, or inheritance, so we can't trust it - DWARFDIE child_die = die.GetFirstChild(); - while (child_die) { - switch (child_die.Tag()) { - case DW_TAG_inheritance: - case DW_TAG_subprogram: - case DW_TAG_member: - case DW_TAG_APPLE_property: - case DW_TAG_class_type: - case DW_TAG_structure_type: - case DW_TAG_enumeration_type: - case DW_TAG_typedef: - case DW_TAG_union_type: - child_die.Clear(); - attrs.is_forward_declaration = false; - break; - default: - child_die = child_die.GetSibling(); - break; - } - } - } - - if (!attrs.is_forward_declaration) { - // Always start the definition for a class type so that if the class - // has child classes or types that require the class to be created - // for use as their decl contexts the class will be ready to accept - // these child definitions. - if (!die.HasChildren()) { - // No children for this struct/union/class, lets finish it - if (ClangASTContext::StartTagDeclarationDefinition(clang_type)) { - ClangASTContext::CompleteTagDeclarationDefinition(clang_type); - } else { - dwarf->GetObjectFile()->GetModule()->ReportError( - "DWARF DIE at 0x%8.8x named \"%s\" was not able to start its " - "definition.\nPlease file a bug and attach the file at the " - "start of this error message", - die.GetOffset(), attrs.name.GetCString()); - } - - if (tag == DW_TAG_structure_type) // this only applies in C - { - clang::RecordDecl *record_decl = - ClangASTContext::GetAsRecordDecl(clang_type); - - if (record_decl) { - GetClangASTImporter().InsertRecordDecl( - record_decl, ClangASTImporter::LayoutInfo()); - } - } - } else if (clang_type_was_created) { - // Start the definition if the class is not objective C since the - // underlying decls respond to isCompleteDefinition(). Objective - // C decls don't respond to isCompleteDefinition() so we can't - // start the declaration definition right away. For C++ - // class/union/structs we want to start the definition in case the - // class is needed as the declaration context for a contained class - // or type without the need to complete that type.. - - if (attrs.class_language != eLanguageTypeObjC && - attrs.class_language != eLanguageTypeObjC_plus_plus) - ClangASTContext::StartTagDeclarationDefinition(clang_type); - - // Leave this as a forward declaration until we need to know the - // details of the type. lldb_private::Type will automatically call - // the SymbolFile virtual function - // "SymbolFileDWARF::CompleteType(Type *)" When the definition - // needs to be defined. - assert(!dwarf->GetForwardDeclClangTypeToDie().count( - ClangUtil::RemoveFastQualifiers(clang_type) - .GetOpaqueQualType()) && - "Type already in the forward declaration map!"); - // Can't assume m_ast.GetSymbolFile() is actually a - // SymbolFileDWARF, it can be a SymbolFileDWARFDebugMap for Apple - // binaries. - dwarf->GetForwardDeclDieToClangType()[die.GetDIE()] = - clang_type.GetOpaqueQualType(); - dwarf->GetForwardDeclClangTypeToDie() - [ClangUtil::RemoveFastQualifiers(clang_type).GetOpaqueQualType()] = - die.GetID(); - m_ast.SetHasExternalStorage(clang_type.GetOpaqueQualType(), true); - } - } - - // If we made a clang type, set the trivial abi if applicable: We only - // do this for pass by value - which implies the Trivial ABI. There - // isn't a way to assert that something that would normally be pass by - // value is pass by reference, so we ignore that attribute if set. - if (attrs.calling_convention == llvm::dwarf::DW_CC_pass_by_value) { - clang::CXXRecordDecl *record_decl = - m_ast.GetAsCXXRecordDecl(clang_type.GetOpaqueQualType()); - if (record_decl && record_decl->getDefinition()) { - record_decl->setHasTrivialSpecialMemberForCall(); - } - } - - if (attrs.calling_convention == llvm::dwarf::DW_CC_pass_by_reference) { - clang::CXXRecordDecl *record_decl = - m_ast.GetAsCXXRecordDecl(clang_type.GetOpaqueQualType()); - if (record_decl) - record_decl->setArgPassingRestrictions( - clang::RecordDecl::APK_CannotPassInRegs); - } - - } break; + assert((!type_sp && !clang_type) && + "Did not expect partially computed structure-like type"); + TypeSP struct_like_type_sp = ParseStructureLikeDIE(die, attrs); + return UpdateSymbolContextScopeForType(sc, die, struct_like_type_sp); + } case DW_TAG_enumeration_type: { if (attrs.is_forward_declaration) { @@ -1656,31 +1262,418 @@ TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc, break; } - if (type_sp.get()) { - DWARFDIE sc_parent_die = SymbolFileDWARF::GetParentSymbolContextDIE(die); - dw_tag_t sc_parent_tag = sc_parent_die.Tag(); + // TODO: We should consider making the switch above exhaustive to simplify + // control flow in ParseTypeFromDWARF. Then, we could simply replace this + // return statement with a call to llvm_unreachable. + return UpdateSymbolContextScopeForType(sc, die, type_sp); +} + +TypeSP DWARFASTParserClang::UpdateSymbolContextScopeForType( + const SymbolContext &sc, const DWARFDIE &die, TypeSP type_sp) { + if (!type_sp) + return type_sp; + + SymbolFileDWARF *dwarf = die.GetDWARF(); + TypeList &type_list = dwarf->GetTypeList(); + DWARFDIE sc_parent_die = SymbolFileDWARF::GetParentSymbolContextDIE(die); + dw_tag_t sc_parent_tag = sc_parent_die.Tag(); + + SymbolContextScope *symbol_context_scope = NULL; + if (sc_parent_tag == DW_TAG_compile_unit || + sc_parent_tag == DW_TAG_partial_unit) { + symbol_context_scope = sc.comp_unit; + } else if (sc.function != NULL && sc_parent_die) { + symbol_context_scope = + sc.function->GetBlock(true).FindBlockByID(sc_parent_die.GetID()); + if (symbol_context_scope == NULL) + symbol_context_scope = sc.function; + } else { + symbol_context_scope = sc.module_sp.get(); + } + + if (symbol_context_scope != NULL) + type_sp->SetSymbolContextScope(symbol_context_scope); + + // We are ready to put this type into the uniqued list up at the module + // level. + type_list.Insert(type_sp); - SymbolContextScope *symbol_context_scope = NULL; - if (sc_parent_tag == DW_TAG_compile_unit || - sc_parent_tag == DW_TAG_partial_unit) { - symbol_context_scope = sc.comp_unit; - } else if (sc.function != NULL && sc_parent_die) { - symbol_context_scope = - sc.function->GetBlock(true).FindBlockByID(sc_parent_die.GetID()); - if (symbol_context_scope == NULL) - symbol_context_scope = sc.function; - } else - symbol_context_scope = sc.module_sp.get(); + dwarf->GetDIEToType()[die.GetDIE()] = type_sp.get(); + return type_sp; +} - if (symbol_context_scope != NULL) { - type_sp->SetSymbolContextScope(symbol_context_scope); +TypeSP +DWARFASTParserClang::ParseStructureLikeDIE(const DWARFDIE &die, + ParsedDWARFTypeAttributes &attrs) { + TypeSP type_sp; + CompilerType clang_type; + const dw_tag_t tag = die.Tag(); + SymbolFileDWARF *dwarf = die.GetDWARF(); + LanguageType cu_language = die.GetLanguage(); + Log *log = LogChannelDWARF::GetLogIfAll(DWARF_LOG_TYPE_COMPLETION | + DWARF_LOG_LOOKUPS); + + // UniqueDWARFASTType is large, so don't create a local variables on the + // stack, put it on the heap. This function is often called recursively and + // clang isn't good at sharing the stack space for variables in different + // blocks. + auto unique_ast_entry_up = std::make_unique(); + + ConstString unique_typename(attrs.name); + Declaration unique_decl(attrs.decl); + + if (attrs.name) { + if (Language::LanguageIsCPlusPlus(cu_language)) { + // For C++, we rely solely upon the one definition rule that says + // only one thing can exist at a given decl context. We ignore the + // file and line that things are declared on. + std::string qualified_name; + if (die.GetQualifiedName(qualified_name)) + unique_typename = ConstString(qualified_name); + unique_decl.Clear(); } - // We are ready to put this type into the uniqued list up at the module - // level - type_list.Insert(type_sp); + if (dwarf->GetUniqueDWARFASTTypeMap().Find( + unique_typename, die, unique_decl, attrs.byte_size.getValueOr(-1), + *unique_ast_entry_up)) { + type_sp = unique_ast_entry_up->m_type_sp; + if (type_sp) { + dwarf->GetDIEToType()[die.GetDIE()] = type_sp.get(); + LinkDeclContextToDIE( + GetCachedClangDeclContextForDIE(unique_ast_entry_up->m_die), die); + return type_sp; + } + } + } - dwarf->GetDIEToType()[die.GetDIE()] = type_sp.get(); + DEBUG_PRINTF("0x%8.8" PRIx64 ": %s (\"%s\")\n", die.GetID(), + DW_TAG_value_to_name(tag), type_name_cstr); + + int tag_decl_kind = -1; + AccessType default_accessibility = eAccessNone; + if (tag == DW_TAG_structure_type) { + tag_decl_kind = clang::TTK_Struct; + default_accessibility = eAccessPublic; + } else if (tag == DW_TAG_union_type) { + tag_decl_kind = clang::TTK_Union; + default_accessibility = eAccessPublic; + } else if (tag == DW_TAG_class_type) { + tag_decl_kind = clang::TTK_Class; + default_accessibility = eAccessPrivate; + } + + if (attrs.byte_size && *attrs.byte_size == 0 && attrs.name && + !die.HasChildren() && cu_language == eLanguageTypeObjC) { + // Work around an issue with clang at the moment where forward + // declarations for objective C classes are emitted as: + // DW_TAG_structure_type [2] + // DW_AT_name( "ForwardObjcClass" ) + // DW_AT_byte_size( 0x00 ) + // DW_AT_decl_file( "..." ) + // DW_AT_decl_line( 1 ) + // + // Note that there is no DW_AT_declaration and there are no children, + // and the byte size is zero. + attrs.is_forward_declaration = true; + } + + if (attrs.class_language == eLanguageTypeObjC || + attrs.class_language == eLanguageTypeObjC_plus_plus) { + if (!attrs.is_complete_objc_class && + die.Supports_DW_AT_APPLE_objc_complete_type()) { + // We have a valid eSymbolTypeObjCClass class symbol whose name + // matches the current objective C class that we are trying to find + // and this DIE isn't the complete definition (we checked + // is_complete_objc_class above and know it is false), so the real + // definition is in here somewhere + type_sp = + dwarf->FindCompleteObjCDefinitionTypeForDIE(die, attrs.name, true); + + if (!type_sp) { + SymbolFileDWARFDebugMap *debug_map_symfile = + dwarf->GetDebugMapSymfile(); + if (debug_map_symfile) { + // We weren't able to find a full declaration in this DWARF, + // see if we have a declaration anywhere else... + type_sp = debug_map_symfile->FindCompleteObjCDefinitionTypeForDIE( + die, attrs.name, true); + } + } + + if (type_sp) { + if (log) { + dwarf->GetObjectFile()->GetModule()->LogMessage( + log, + "SymbolFileDWARF(%p) - 0x%8.8x: %s type \"%s\" is an " + "incomplete objc type, complete type is 0x%8.8" PRIx64, + static_cast(this), die.GetOffset(), + DW_TAG_value_to_name(tag), attrs.name.GetCString(), + type_sp->GetID()); + } + + // We found a real definition for this type elsewhere so lets use + // it and cache the fact that we found a complete type for this + // die + dwarf->GetDIEToType()[die.GetDIE()] = type_sp.get(); + return type_sp; + } + } + } + + if (attrs.is_forward_declaration) { + // We have a forward declaration to a type and we need to try and + // find a full declaration. We look in the current type index just in + // case we have a forward declaration followed by an actual + // declarations in the DWARF. If this fails, we need to look + // elsewhere... + if (log) { + dwarf->GetObjectFile()->GetModule()->LogMessage( + log, + "SymbolFileDWARF(%p) - 0x%8.8x: %s type \"%s\" is a " + "forward declaration, trying to find complete type", + static_cast(this), die.GetOffset(), DW_TAG_value_to_name(tag), + attrs.name.GetCString()); + } + + // See if the type comes from a DWO module and if so, track down that + // type. + type_sp = ParseTypeFromDWO(die, log); + if (type_sp) + return type_sp; + + DWARFDeclContext die_decl_ctx; + die.GetDWARFDeclContext(die_decl_ctx); + + // type_sp = FindDefinitionTypeForDIE (dwarf_cu, die, + // type_name_const_str); + type_sp = dwarf->FindDefinitionTypeForDWARFDeclContext(die_decl_ctx); + + if (!type_sp) { + SymbolFileDWARFDebugMap *debug_map_symfile = dwarf->GetDebugMapSymfile(); + if (debug_map_symfile) { + // We weren't able to find a full declaration in this DWARF, see + // if we have a declaration anywhere else... + type_sp = debug_map_symfile->FindDefinitionTypeForDWARFDeclContext( + die_decl_ctx); + } + } + + if (type_sp) { + if (log) { + dwarf->GetObjectFile()->GetModule()->LogMessage( + log, + "SymbolFileDWARF(%p) - 0x%8.8x: %s type \"%s\" is a " + "forward declaration, complete type is 0x%8.8" PRIx64, + static_cast(this), die.GetOffset(), + DW_TAG_value_to_name(tag), attrs.name.GetCString(), + type_sp->GetID()); + } + + // We found a real definition for this type elsewhere so lets use + // it and cache the fact that we found a complete type for this die + dwarf->GetDIEToType()[die.GetDIE()] = type_sp.get(); + clang::DeclContext *defn_decl_ctx = + GetCachedClangDeclContextForDIE(dwarf->GetDIE(type_sp->GetID())); + if (defn_decl_ctx) + LinkDeclContextToDIE(defn_decl_ctx, die); + return type_sp; + } + } + assert(tag_decl_kind != -1); + bool clang_type_was_created = false; + clang_type.SetCompilerType( + &m_ast, dwarf->GetForwardDeclDieToClangType().lookup(die.GetDIE())); + if (!clang_type) { + clang::DeclContext *decl_ctx = + GetClangDeclContextContainingDIE(die, nullptr); + + // If your decl context is a record that was imported from another + // AST context (in the gmodules case), we need to make sure the type + // backing the Decl is complete before adding children to it. This is + // not an issue in the non-gmodules case because the debug info will + // always contain a full definition of parent types in that case. + CompleteExternalTagDeclType(GetClangASTImporter(), decl_ctx, die, + attrs.name.GetCString()); + + if (attrs.accessibility == eAccessNone && decl_ctx) { + // Check the decl context that contains this class/struct/union. If + // it is a class we must give it an accessibility. + const clang::Decl::Kind containing_decl_kind = decl_ctx->getDeclKind(); + if (DeclKindIsCXXClass(containing_decl_kind)) + attrs.accessibility = default_accessibility; + } + + ClangASTMetadata metadata; + metadata.SetUserID(die.GetID()); + metadata.SetIsDynamicCXXType(dwarf->ClassOrStructIsVirtual(die)); + + if (attrs.name.GetStringRef().contains('<')) { + ClangASTContext::TemplateParameterInfos template_param_infos; + if (ParseTemplateParameterInfos(die, template_param_infos)) { + clang::ClassTemplateDecl *class_template_decl = + m_ast.ParseClassTemplateDecl(decl_ctx, attrs.accessibility, + attrs.name.GetCString(), tag_decl_kind, + template_param_infos); + if (!class_template_decl) { + if (log) { + dwarf->GetObjectFile()->GetModule()->LogMessage( + log, + "SymbolFileDWARF(%p) - 0x%8.8x: %s type \"%s\" " + "clang::ClassTemplateDecl failed to return a decl.", + static_cast(this), die.GetOffset(), + DW_TAG_value_to_name(tag), attrs.name.GetCString()); + } + return TypeSP(); + } + + clang::ClassTemplateSpecializationDecl *class_specialization_decl = + m_ast.CreateClassTemplateSpecializationDecl( + decl_ctx, class_template_decl, tag_decl_kind, + template_param_infos); + clang_type = m_ast.CreateClassTemplateSpecializationType( + class_specialization_decl); + clang_type_was_created = true; + + m_ast.SetMetadata(class_template_decl, metadata); + m_ast.SetMetadata(class_specialization_decl, metadata); + } + } + + if (!clang_type_was_created) { + clang_type_was_created = true; + clang_type = m_ast.CreateRecordType( + decl_ctx, attrs.accessibility, attrs.name.GetCString(), tag_decl_kind, + attrs.class_language, &metadata); + } + } + + // Store a forward declaration to this class type in case any + // parameters in any class methods need it for the clang types for + // function prototypes. + LinkDeclContextToDIE(m_ast.GetDeclContextForType(clang_type), die); + type_sp = std::make_shared(die.GetID(), dwarf, attrs.name, + attrs.byte_size, nullptr, LLDB_INVALID_UID, + Type::eEncodingIsUID, &attrs.decl, + clang_type, Type::eResolveStateForward); + + type_sp->SetIsCompleteObjCClass(attrs.is_complete_objc_class); + + // Add our type to the unique type map so we don't end up creating many + // copies of the same type over and over in the ASTContext for our + // module + unique_ast_entry_up->m_type_sp = type_sp; + unique_ast_entry_up->m_die = die; + unique_ast_entry_up->m_declaration = unique_decl; + unique_ast_entry_up->m_byte_size = attrs.byte_size.getValueOr(0); + dwarf->GetUniqueDWARFASTTypeMap().Insert(unique_typename, + *unique_ast_entry_up); + + if (attrs.is_forward_declaration && die.HasChildren()) { + // Check to see if the DIE actually has a definition, some version of + // GCC will + // emit DIEs with DW_AT_declaration set to true, but yet still have + // subprogram, members, or inheritance, so we can't trust it + DWARFDIE child_die = die.GetFirstChild(); + while (child_die) { + switch (child_die.Tag()) { + case DW_TAG_inheritance: + case DW_TAG_subprogram: + case DW_TAG_member: + case DW_TAG_APPLE_property: + case DW_TAG_class_type: + case DW_TAG_structure_type: + case DW_TAG_enumeration_type: + case DW_TAG_typedef: + case DW_TAG_union_type: + child_die.Clear(); + attrs.is_forward_declaration = false; + break; + default: + child_die = child_die.GetSibling(); + break; + } + } + } + + if (!attrs.is_forward_declaration) { + // Always start the definition for a class type so that if the class + // has child classes or types that require the class to be created + // for use as their decl contexts the class will be ready to accept + // these child definitions. + if (!die.HasChildren()) { + // No children for this struct/union/class, lets finish it + if (ClangASTContext::StartTagDeclarationDefinition(clang_type)) { + ClangASTContext::CompleteTagDeclarationDefinition(clang_type); + } else { + dwarf->GetObjectFile()->GetModule()->ReportError( + "DWARF DIE at 0x%8.8x named \"%s\" was not able to start its " + "definition.\nPlease file a bug and attach the file at the " + "start of this error message", + die.GetOffset(), attrs.name.GetCString()); + } + + if (tag == DW_TAG_structure_type) // this only applies in C + { + clang::RecordDecl *record_decl = + ClangASTContext::GetAsRecordDecl(clang_type); + + if (record_decl) { + GetClangASTImporter().InsertRecordDecl( + record_decl, ClangASTImporter::LayoutInfo()); + } + } + } else if (clang_type_was_created) { + // Start the definition if the class is not objective C since the + // underlying decls respond to isCompleteDefinition(). Objective + // C decls don't respond to isCompleteDefinition() so we can't + // start the declaration definition right away. For C++ + // class/union/structs we want to start the definition in case the + // class is needed as the declaration context for a contained class + // or type without the need to complete that type.. + + if (attrs.class_language != eLanguageTypeObjC && + attrs.class_language != eLanguageTypeObjC_plus_plus) + ClangASTContext::StartTagDeclarationDefinition(clang_type); + + // Leave this as a forward declaration until we need to know the + // details of the type. lldb_private::Type will automatically call + // the SymbolFile virtual function + // "SymbolFileDWARF::CompleteType(Type *)" When the definition + // needs to be defined. + assert(!dwarf->GetForwardDeclClangTypeToDie().count( + ClangUtil::RemoveFastQualifiers(clang_type) + .GetOpaqueQualType()) && + "Type already in the forward declaration map!"); + // Can't assume m_ast.GetSymbolFile() is actually a + // SymbolFileDWARF, it can be a SymbolFileDWARFDebugMap for Apple + // binaries. + dwarf->GetForwardDeclDieToClangType()[die.GetDIE()] = + clang_type.GetOpaqueQualType(); + dwarf->GetForwardDeclClangTypeToDie() + [ClangUtil::RemoveFastQualifiers(clang_type).GetOpaqueQualType()] = + die.GetID(); + m_ast.SetHasExternalStorage(clang_type.GetOpaqueQualType(), true); + } + } + + // If we made a clang type, set the trivial abi if applicable: We only + // do this for pass by value - which implies the Trivial ABI. There + // isn't a way to assert that something that would normally be pass by + // value is pass by reference, so we ignore that attribute if set. + if (attrs.calling_convention == llvm::dwarf::DW_CC_pass_by_value) { + clang::CXXRecordDecl *record_decl = + m_ast.GetAsCXXRecordDecl(clang_type.GetOpaqueQualType()); + if (record_decl && record_decl->getDefinition()) { + record_decl->setHasTrivialSpecialMemberForCall(); + } + } + + if (attrs.calling_convention == llvm::dwarf::DW_CC_pass_by_reference) { + clang::CXXRecordDecl *record_decl = + m_ast.GetAsCXXRecordDecl(clang_type.GetOpaqueQualType()); + if (record_decl) + record_decl->setArgPassingRestrictions( + clang::RecordDecl::APK_CannotPassInRegs); } return type_sp; } diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h index c0c32c5bba6cf..106f9254a4495 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h @@ -15,7 +15,10 @@ #include "llvm/ADT/SmallVector.h" #include "DWARFASTParser.h" +#include "DWARFDIE.h" #include "DWARFDefines.h" +#include "DWARFFormValue.h" +#include "LogChannelDWARF.h" #include "lldb/Core/ClangForward.h" #include "lldb/Core/PluginInterface.h" #include "lldb/Symbol/ClangASTContext.h" @@ -29,6 +32,8 @@ class CompileUnit; class DWARFDebugInfoEntry; class SymbolFileDWARF; +struct ParsedDWARFTypeAttributes; + class DWARFASTParserClang : public DWARFASTParser { public: DWARFASTParserClang(lldb_private::ClangASTContext &ast); @@ -37,7 +42,7 @@ class DWARFASTParserClang : public DWARFASTParser { // DWARFASTParser interface. lldb::TypeSP ParseTypeFromDWARF(const lldb_private::SymbolContext &sc, - const DWARFDIE &die, lldb_private::Log *log, + const DWARFDIE &die, bool *type_is_new_ptr) override; lldb_private::Function * @@ -122,6 +127,10 @@ class DWARFASTParserClang : public DWARFASTParser { bool is_signed, uint32_t enumerator_byte_size, const DWARFDIE &parent_die); + /// Parse a structure, class, or union type DIE. + lldb::TypeSP ParseStructureLikeDIE(const DWARFDIE &die, + ParsedDWARFTypeAttributes &attrs); + lldb_private::Type *GetTypeForDIE(const DWARFDIE &die); clang::Decl *GetClangDeclForDIE(const DWARFDIE &die); @@ -142,6 +151,14 @@ class DWARFASTParserClang : public DWARFASTParser { void LinkDeclToDIE(clang::Decl *decl, const DWARFDIE &die); + /// If \p type_sp is valid, calculate and set its symbol context scope, and + /// update the type list for its backing symbol file. + /// + /// Returns \p type_sp. + lldb::TypeSP + UpdateSymbolContextScopeForType(const lldb_private::SymbolContext &sc, + const DWARFDIE &die, lldb::TypeSP type_sp); + lldb::TypeSP ParseTypeFromDWO(const DWARFDIE &die, lldb_private::Log *log); // Return true if this type is a declaration to a type in an external @@ -149,4 +166,37 @@ class DWARFASTParserClang : public DWARFASTParser { lldb::ModuleSP GetModuleForType(const DWARFDIE &die); }; +/// Parsed form of all attributes that are relevant for type reconstruction. +/// Some attributes are relevant for all kinds of types (declaration), while +/// others are only meaningful to a specific type (is_virtual) +struct ParsedDWARFTypeAttributes { + explicit ParsedDWARFTypeAttributes(const DWARFDIE &die); + + lldb::AccessType accessibility = lldb::eAccessNone; + bool is_artificial = false; + bool is_complete_objc_class = false; + bool is_explicit = false; + bool is_forward_declaration = false; + bool is_inline = false; + bool is_scoped_enum = false; + bool is_vector = false; + bool is_virtual = false; + clang::StorageClass storage = clang::SC_None; + const char *mangled_name = nullptr; + lldb_private::ConstString name; + lldb_private::Declaration decl; + DWARFDIE object_pointer; + DWARFFormValue abstract_origin; + DWARFFormValue containing_type; + DWARFFormValue signature; + DWARFFormValue specification; + DWARFFormValue type; + lldb::LanguageType class_language = lldb::eLanguageTypeUnknown; + llvm::Optional byte_size; + size_t calling_convention = llvm::dwarf::DW_CC_normal; + uint32_t bit_stride = 0; + uint32_t byte_stride = 0; + uint32_t encoding = 0; +}; + #endif // SymbolFileDWARF_DWARFASTParserClang_h_ diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp index 1db516387c67d..863cf2c1f0d7c 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp @@ -3001,8 +3001,7 @@ TypeSP SymbolFileDWARF::ParseType(const SymbolContext &sc, const DWARFDIE &die, if (!dwarf_ast) return {}; - Log *log = LogChannelDWARF::GetLogIfAll(DWARF_LOG_DEBUG_INFO); - TypeSP type_sp = dwarf_ast->ParseTypeFromDWARF(sc, die, log, type_is_new_ptr); + TypeSP type_sp = dwarf_ast->ParseTypeFromDWARF(sc, die, type_is_new_ptr); if (type_sp) { GetTypeList().Insert(type_sp); From e4cec2d3c73f7b30edc03b6b8219273fc81fc477 Mon Sep 17 00:00:00 2001 From: Yitzhak Mandelbaum Date: Mon, 7 Oct 2019 17:24:23 +0000 Subject: [PATCH 131/254] [libTooling][NFC] Fix build break in r373916. r373916 used raw strings inside macro calls, which breaks some builds. llvm-svn: 373928 --- clang/unittests/Tooling/StencilTest.cpp | 36 +++++++++++++++---------- 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/clang/unittests/Tooling/StencilTest.cpp b/clang/unittests/Tooling/StencilTest.cpp index 877193bf70a1c..0bd3d90f83fe6 100644 --- a/clang/unittests/Tooling/StencilTest.cpp +++ b/clang/unittests/Tooling/StencilTest.cpp @@ -392,56 +392,64 @@ TEST(StencilEqualityTest, InEqualityRun) { TEST(StencilToStringTest, RawTextOp) { auto S = cat("foo bar baz"); - EXPECT_EQ(S.toString(), R"("foo bar baz")"); + StringRef Expected = R"("foo bar baz")"; + EXPECT_EQ(S.toString(), Expected); } TEST(StencilToStringTest, RawTextOpEscaping) { auto S = cat("foo \"bar\" baz\\n"); - EXPECT_EQ(S.toString(), R"("foo \"bar\" baz\\n")"); + StringRef Expected = R"("foo \"bar\" baz\\n")"; + EXPECT_EQ(S.toString(), Expected); } TEST(StencilToStringTest, DebugPrintNodeOp) { auto S = cat(dPrint("Id")); - EXPECT_EQ(S.toString(), R"repr(dPrint("Id"))repr"); + StringRef Expected = R"repr(dPrint("Id"))repr"; + EXPECT_EQ(S.toString(), Expected); } TEST(StencilToStringTest, ExpressionOp) { auto S = cat(expression("Id")); - EXPECT_EQ(S.toString(), R"repr(expression("Id"))repr"); + StringRef Expected = R"repr(expression("Id"))repr"; + EXPECT_EQ(S.toString(), Expected); } TEST(StencilToStringTest, DerefOp) { auto S = cat(deref("Id")); - EXPECT_EQ(S.toString(), R"repr(deref("Id"))repr"); + StringRef Expected = R"repr(deref("Id"))repr"; + EXPECT_EQ(S.toString(), Expected); } TEST(StencilToStringTest, AddressOfOp) { auto S = cat(addressOf("Id")); - EXPECT_EQ(S.toString(), R"repr(addressOf("Id"))repr"); + StringRef Expected = R"repr(addressOf("Id"))repr"; + EXPECT_EQ(S.toString(), Expected); } TEST(StencilToStringTest, AccessOp) { auto S = cat(access("Id", text("memberData"))); - EXPECT_EQ(S.toString(), R"repr(access("Id", "memberData"))repr"); + StringRef Expected = R"repr(access("Id", "memberData"))repr"; + EXPECT_EQ(S.toString(), Expected); } TEST(StencilToStringTest, AccessOpStencilPart) { auto S = cat(access("Id", access("subId", "memberData"))); - EXPECT_EQ(S.toString(), - R"repr(access("Id", access("subId", "memberData")))repr"); + StringRef Expected = R"repr(access("Id", access("subId", "memberData")))repr"; + EXPECT_EQ(S.toString(), Expected); } TEST(StencilToStringTest, IfBoundOp) { auto S = cat(ifBound("Id", text("trueText"), access("exprId", "memberData"))); - EXPECT_EQ( - S.toString(), - R"repr(ifBound("Id", "trueText", access("exprId", "memberData")))repr"); + StringRef Expected = + R"repr(ifBound("Id", "trueText", access("exprId", "memberData")))repr"; + EXPECT_EQ(S.toString(), Expected); } TEST(StencilToStringTest, MultipleOp) { auto S = cat("foo", access("x", "m()"), "bar", ifBound("x", text("t"), access("e", "f"))); - EXPECT_EQ(S.toString(), R"repr("foo", access("x", "m()"), "bar", )repr" - R"repr(ifBound("x", "t", access("e", "f")))repr"); + StringRef Expected = R"repr("foo", access("x", "m()"), "bar", )repr" + R"repr(ifBound("x", "t", access("e", "f")))repr"; + EXPECT_EQ(S.toString(), Expected); } } // namespace From 8a410bcef02c25b6b3a49fa6483875eb64539aad Mon Sep 17 00:00:00 2001 From: Erich Keane Date: Mon, 7 Oct 2019 17:28:03 +0000 Subject: [PATCH 132/254] Fix Calling Convention through aliases r369697 changed the behavior of stripPointerCasts to no longer include aliases. However, the code in CGDeclCXX.cpp's createAtExitStub counted on the looking through aliases to properly set the calling convention of a call. The result of the change was that the calling convention mismatch of the call would be replaced with a llvm.trap, causing a runtime crash. Differential Revision: https://reviews.llvm.org/D68584 llvm-svn: 373929 --- clang/lib/CodeGen/CGDeclCXX.cpp | 4 ++-- .../test/CodeGenCXX/call-conv-thru-alias.cpp | 21 +++++++++++++++++++ llvm/include/llvm/IR/Value.h | 10 +++++++++ llvm/lib/IR/Value.cpp | 8 +++++++ 4 files changed, 41 insertions(+), 2 deletions(-) create mode 100644 clang/test/CodeGenCXX/call-conv-thru-alias.cpp diff --git a/clang/lib/CodeGen/CGDeclCXX.cpp b/clang/lib/CodeGen/CGDeclCXX.cpp index a54e5dcfda2d7..bf16b7bec4b19 100644 --- a/clang/lib/CodeGen/CGDeclCXX.cpp +++ b/clang/lib/CodeGen/CGDeclCXX.cpp @@ -248,8 +248,8 @@ llvm::Function *CodeGenFunction::createAtExitStub(const VarDecl &VD, llvm::CallInst *call = CGF.Builder.CreateCall(dtor, addr); // Make sure the call and the callee agree on calling convention. - if (llvm::Function *dtorFn = - dyn_cast(dtor.getCallee()->stripPointerCasts())) + if (auto *dtorFn = dyn_cast( + dtor.getCallee()->stripPointerCastsAndAliases())) call->setCallingConv(dtorFn->getCallingConv()); CGF.FinishFunction(); diff --git a/clang/test/CodeGenCXX/call-conv-thru-alias.cpp b/clang/test/CodeGenCXX/call-conv-thru-alias.cpp new file mode 100644 index 0000000000000..5bd5c7dbc5caf --- /dev/null +++ b/clang/test/CodeGenCXX/call-conv-thru-alias.cpp @@ -0,0 +1,21 @@ +// RUN: %clang_cc1 -triple i686-windows-pc -emit-llvm -o - -mconstructor-aliases -O1 -disable-llvm-passes %s | FileCheck %s + +struct Base { virtual ~Base(); }; +struct Derived : Base { + virtual ~Derived(); + static Derived inst; +}; + +Base::~Base(){} +Derived::~Derived(){} +Derived Derived::inst; + +// CHECK: @"??1Derived@@UAE@XZ" = dso_local unnamed_addr alias void (%struct.Derived*), bitcast (void (%struct.Base*)* @"??1Base@@UAE@XZ" to void (%struct.Derived*)*) + +// CHECK: define dso_local x86_thiscallcc void @"??1Base@@UAE@XZ" +// CHECK: define internal void @"??__E?inst@Derived@@2U1@A@@YAXXZ" +// CHECK: call i32 @atexit(void ()* @"??__F?inst@Derived@@2U1@A@@YAXXZ" +// +// CHECK: define internal void @"??__F?inst@Derived@@2U1@A@@YAXXZ" +// CHECK-NEXT: entry: +// CHECK-NEXT: call x86_thiscallcc void @"??1Derived@@UAE@XZ" diff --git a/llvm/include/llvm/IR/Value.h b/llvm/include/llvm/IR/Value.h index 03d2ce4d7f88f..58502907f0e35 100644 --- a/llvm/include/llvm/IR/Value.h +++ b/llvm/include/llvm/IR/Value.h @@ -523,6 +523,16 @@ class Value { static_cast(this)->stripPointerCasts()); } + /// Strip off pointer casts, all-zero GEPs, address space casts, and aliases. + /// + /// Returns the original uncasted value. If this is called on a non-pointer + /// value, it returns 'this'. + const Value *stripPointerCastsAndAliases() const; + Value *stripPointerCastsAndAliases() { + return const_cast( + static_cast(this)->stripPointerCastsAndAliases()); + } + /// Strip off pointer casts, all-zero GEPs and address space casts /// but ensures the representation of the result stays the same. /// diff --git a/llvm/lib/IR/Value.cpp b/llvm/lib/IR/Value.cpp index 65b98d382cc47..c44d4b4f2bcc0 100644 --- a/llvm/lib/IR/Value.cpp +++ b/llvm/lib/IR/Value.cpp @@ -455,6 +455,7 @@ namespace { // Various metrics for how much to strip off of pointers. enum PointerStripKind { PSK_ZeroIndices, + PSK_ZeroIndicesAndAliases, PSK_ZeroIndicesSameRepresentation, PSK_ZeroIndicesAndInvariantGroups, PSK_InBoundsConstantIndices, @@ -475,6 +476,7 @@ static const Value *stripPointerCastsAndOffsets(const Value *V) { if (auto *GEP = dyn_cast(V)) { switch (StripKind) { case PSK_ZeroIndices: + case PSK_ZeroIndicesAndAliases: case PSK_ZeroIndicesSameRepresentation: case PSK_ZeroIndicesAndInvariantGroups: if (!GEP->hasAllZeroIndices()) @@ -497,6 +499,8 @@ static const Value *stripPointerCastsAndOffsets(const Value *V) { // TODO: If we know an address space cast will not change the // representation we could look through it here as well. V = cast(V)->getOperand(0); + } else if (StripKind == PSK_ZeroIndicesAndAliases && isa(V)) { + V = cast(V)->getAliasee(); } else { if (const auto *Call = dyn_cast(V)) { if (const Value *RV = Call->getReturnedArgOperand()) { @@ -526,6 +530,10 @@ const Value *Value::stripPointerCasts() const { return stripPointerCastsAndOffsets(this); } +const Value *Value::stripPointerCastsAndAliases() const { + return stripPointerCastsAndOffsets(this); +} + const Value *Value::stripPointerCastsSameRepresentation() const { return stripPointerCastsAndOffsets(this); } From bebdab63e84ce058ee4ff8b37de48b73197ae24e Mon Sep 17 00:00:00 2001 From: Kostya Kortchinsky Date: Mon, 7 Oct 2019 17:37:39 +0000 Subject: [PATCH 133/254] [scudo][standalone] Correct releaseToOS behavior Summary: There was an issue in `releaseToOSMaybe`: one of the criteria to decide if we should proceed with the release was wrong. Namely: ``` const uptr N = Sci->Stats.PoppedBlocks - Sci->Stats.PushedBlocks; if (N * BlockSize < PageSize) return; // No chance to release anything. ``` I meant to check if the amount of bytes in the free list was lower than a page, but this actually checks if the amount of **in use** bytes was lower than a page. The correct code is: ``` const uptr BytesInFreeList = Region->AllocatedUser - (Region->Stats.PoppedBlocks - Region->Stats.PushedBlocks) * BlockSize; if (BytesInFreeList < PageSize) return 0; // No chance to release anything. ``` Consequences of the bug: - if a class size has less than a page worth of in-use bytes (allocated or in a cache), reclaiming would not occur, whatever the amount of blocks in the free list; in real world scenarios this is unlikely to happen and be impactful; - if a class size had less than a page worth of free bytes (and enough in-use bytes, etc), then reclaiming would be attempted, with likely no result. This means the reclaiming was overzealous at times. I didn't have a good way to test for this, so I changed the prototype of the function to return the number of bytes released, allowing to get the information needed. The test added fails with the initial criteria. Another issue is that `ReleaseToOsInterval` can actually be 0, meaning we always try to release (side note: it's terrible for performances). so change a `> 0` check to `>= 0`. Additionally, decrease the `CanRelease` threshold to `PageSize / 32`. I still have to make that configurable but I will do it at another time. Finally, rename some variables in `printStats`: I feel like "available" was too ambiguous, so change it to "total". Reviewers: morehouse, hctim, eugenis, vitalybuka, cferris Reviewed By: morehouse Subscribers: delcypher, #sanitizers, llvm-commits Tags: #llvm, #sanitizers Differential Revision: https://reviews.llvm.org/D68471 llvm-svn: 373930 --- compiler-rt/lib/scudo/standalone/primary32.h | 29 +++++++++------ compiler-rt/lib/scudo/standalone/primary64.h | 36 ++++++++++--------- .../scudo/standalone/tests/primary_test.cpp | 29 +++++++++++++++ 3 files changed, 67 insertions(+), 27 deletions(-) diff --git a/compiler-rt/lib/scudo/standalone/primary32.h b/compiler-rt/lib/scudo/standalone/primary32.h index 79a11bfc1b83d..a9fbb70bccf8d 100644 --- a/compiler-rt/lib/scudo/standalone/primary32.h +++ b/compiler-rt/lib/scudo/standalone/primary32.h @@ -72,9 +72,9 @@ template class SizeClassAllocator32 { SizeClassInfo *Sci = getSizeClassInfo(I); Sci->RandState = getRandomU32(&Seed); // See comment in the 64-bit primary about releasing smaller size classes. - Sci->CanRelease = (ReleaseToOsInterval > 0) && + Sci->CanRelease = (ReleaseToOsInterval >= 0) && (I != SizeClassMap::BatchClassId) && - (getSizeByClassId(I) >= (PageSize / 16)); + (getSizeByClassId(I) >= (PageSize / 32)); } ReleaseToOsIntervalMs = ReleaseToOsInterval; } @@ -161,14 +161,16 @@ template class SizeClassAllocator32 { printStats(I, 0); } - void releaseToOS() { + uptr releaseToOS() { + uptr TotalReleasedBytes = 0; for (uptr I = 0; I < NumClasses; I++) { if (I == SizeClassMap::BatchClassId) continue; SizeClassInfo *Sci = getSizeClassInfo(I); ScopedLock L(Sci->Mutex); - releaseToOSMaybe(Sci, I, /*Force=*/true); + TotalReleasedBytes += releaseToOSMaybe(Sci, I, /*Force=*/true); } + return TotalReleasedBytes; } private: @@ -339,35 +341,38 @@ template class SizeClassAllocator32 { AvailableChunks, Rss >> 10); } - NOINLINE void releaseToOSMaybe(SizeClassInfo *Sci, uptr ClassId, + NOINLINE uptr releaseToOSMaybe(SizeClassInfo *Sci, uptr ClassId, bool Force = false) { const uptr BlockSize = getSizeByClassId(ClassId); const uptr PageSize = getPageSizeCached(); CHECK_GE(Sci->Stats.PoppedBlocks, Sci->Stats.PushedBlocks); - const uptr N = Sci->Stats.PoppedBlocks - Sci->Stats.PushedBlocks; - if (N * BlockSize < PageSize) - return; // No chance to release anything. + const uptr BytesInFreeList = + Sci->AllocatedUser - + (Sci->Stats.PoppedBlocks - Sci->Stats.PushedBlocks) * BlockSize; + if (BytesInFreeList < PageSize) + return 0; // No chance to release anything. if ((Sci->Stats.PushedBlocks - Sci->ReleaseInfo.PushedBlocksAtLastRelease) * BlockSize < PageSize) { - return; // Nothing new to release. + return 0; // Nothing new to release. } if (!Force) { const s32 IntervalMs = ReleaseToOsIntervalMs; if (IntervalMs < 0) - return; + return 0; if (Sci->ReleaseInfo.LastReleaseAtNs + static_cast(IntervalMs) * 1000000ULL > getMonotonicTime()) { - return; // Memory was returned recently. + return 0; // Memory was returned recently. } } // TODO(kostyak): currently not ideal as we loop over all regions and // iterate multiple times over the same freelist if a ClassId spans multiple // regions. But it will have to do for now. + uptr TotalReleasedBytes = 0; for (uptr I = MinRegionIndex; I <= MaxRegionIndex; I++) { if (PossibleRegions[I] == ClassId) { ReleaseRecorder Recorder(I * RegionSize); @@ -377,10 +382,12 @@ template class SizeClassAllocator32 { Sci->ReleaseInfo.PushedBlocksAtLastRelease = Sci->Stats.PushedBlocks; Sci->ReleaseInfo.RangesReleased += Recorder.getReleasedRangesCount(); Sci->ReleaseInfo.LastReleasedBytes = Recorder.getReleasedBytes(); + TotalReleasedBytes += Sci->ReleaseInfo.LastReleasedBytes; } } } Sci->ReleaseInfo.LastReleaseAtNs = getMonotonicTime(); + return TotalReleasedBytes; } SizeClassInfo SizeClassInfoArray[NumClasses]; diff --git a/compiler-rt/lib/scudo/standalone/primary64.h b/compiler-rt/lib/scudo/standalone/primary64.h index 96fd1e6d55f2b..f56387b05004c 100644 --- a/compiler-rt/lib/scudo/standalone/primary64.h +++ b/compiler-rt/lib/scudo/standalone/primary64.h @@ -79,9 +79,9 @@ template class SizeClassAllocator64 { // memory accesses which ends up being fairly costly. The current lower // limit is mostly arbitrary and based on empirical observations. // TODO(kostyak): make the lower limit a runtime option - Region->CanRelease = (ReleaseToOsInterval > 0) && + Region->CanRelease = (ReleaseToOsInterval >= 0) && (I != SizeClassMap::BatchClassId) && - (getSizeByClassId(I) >= (PageSize / 16)); + (getSizeByClassId(I) >= (PageSize / 32)); Region->RandState = getRandomU32(&Seed); } ReleaseToOsIntervalMs = ReleaseToOsInterval; @@ -167,14 +167,16 @@ template class SizeClassAllocator64 { printStats(I, 0); } - void releaseToOS() { + uptr releaseToOS() { + uptr TotalReleasedBytes = 0; for (uptr I = 0; I < NumClasses; I++) { if (I == SizeClassMap::BatchClassId) continue; RegionInfo *Region = getRegionInfo(I); ScopedLock L(Region->Mutex); - releaseToOSMaybe(Region, I, /*Force=*/true); + TotalReleasedBytes += releaseToOSMaybe(Region, I, /*Force=*/true); } + return TotalReleasedBytes; } private: @@ -259,7 +261,7 @@ template class SizeClassAllocator64 { const uptr MappedUser = Region->MappedUser; const uptr TotalUserBytes = Region->AllocatedUser + MaxCount * Size; // Map more space for blocks, if necessary. - if (LIKELY(TotalUserBytes > MappedUser)) { + if (TotalUserBytes > MappedUser) { // Do the mmap for the user memory. const uptr UserMapSize = roundUpTo(TotalUserBytes - MappedUser, MapSizeIncrement); @@ -325,43 +327,44 @@ template class SizeClassAllocator64 { if (Region->MappedUser == 0) return; const uptr InUse = Region->Stats.PoppedBlocks - Region->Stats.PushedBlocks; - const uptr AvailableChunks = - Region->AllocatedUser / getSizeByClassId(ClassId); + const uptr TotalChunks = Region->AllocatedUser / getSizeByClassId(ClassId); Printf("%s %02zu (%6zu): mapped: %6zuK popped: %7zu pushed: %7zu inuse: " - "%6zu avail: %6zu rss: %6zuK releases: %6zu last released: %6zuK " + "%6zu total: %6zu rss: %6zuK releases: %6zu last released: %6zuK " "region: 0x%zx (0x%zx)\n", Region->Exhausted ? "F" : " ", ClassId, getSizeByClassId(ClassId), Region->MappedUser >> 10, Region->Stats.PoppedBlocks, - Region->Stats.PushedBlocks, InUse, AvailableChunks, Rss >> 10, + Region->Stats.PushedBlocks, InUse, TotalChunks, Rss >> 10, Region->ReleaseInfo.RangesReleased, Region->ReleaseInfo.LastReleasedBytes >> 10, Region->RegionBeg, getRegionBaseByClassId(ClassId)); } - NOINLINE void releaseToOSMaybe(RegionInfo *Region, uptr ClassId, + NOINLINE uptr releaseToOSMaybe(RegionInfo *Region, uptr ClassId, bool Force = false) { const uptr BlockSize = getSizeByClassId(ClassId); const uptr PageSize = getPageSizeCached(); CHECK_GE(Region->Stats.PoppedBlocks, Region->Stats.PushedBlocks); - const uptr N = Region->Stats.PoppedBlocks - Region->Stats.PushedBlocks; - if (N * BlockSize < PageSize) - return; // No chance to release anything. + const uptr BytesInFreeList = + Region->AllocatedUser - + (Region->Stats.PoppedBlocks - Region->Stats.PushedBlocks) * BlockSize; + if (BytesInFreeList < PageSize) + return 0; // No chance to release anything. if ((Region->Stats.PushedBlocks - Region->ReleaseInfo.PushedBlocksAtLastRelease) * BlockSize < PageSize) { - return; // Nothing new to release. + return 0; // Nothing new to release. } if (!Force) { const s32 IntervalMs = ReleaseToOsIntervalMs; if (IntervalMs < 0) - return; + return 0; if (Region->ReleaseInfo.LastReleaseAtNs + static_cast(IntervalMs) * 1000000ULL > getMonotonicTime()) { - return; // Memory was returned recently. + return 0; // Memory was returned recently. } } @@ -377,6 +380,7 @@ template class SizeClassAllocator64 { Region->ReleaseInfo.LastReleasedBytes = Recorder.getReleasedBytes(); } Region->ReleaseInfo.LastReleaseAtNs = getMonotonicTime(); + return Recorder.getReleasedBytes(); } }; diff --git a/compiler-rt/lib/scudo/standalone/tests/primary_test.cpp b/compiler-rt/lib/scudo/standalone/tests/primary_test.cpp index 329a4c11953cf..a6cfc6bdb1b79 100644 --- a/compiler-rt/lib/scudo/standalone/tests/primary_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/primary_test.cpp @@ -188,3 +188,32 @@ TEST(ScudoPrimaryTest, PrimaryThreaded) { testPrimaryThreaded>(); testPrimaryThreaded>(); } + +// Through a simple allocation that spans two pages, verify that releaseToOS +// actually releases some bytes (at least one page worth). This is a regression +// test for an error in how the release criteria were computed. +template static void testReleaseToOS() { + auto Deleter = [](Primary *P) { + P->unmapTestOnly(); + delete P; + }; + std::unique_ptr Allocator(new Primary, Deleter); + Allocator->init(/*ReleaseToOsInterval=*/-1); + typename Primary::CacheT Cache; + Cache.init(nullptr, Allocator.get()); + const scudo::uptr Size = scudo::getPageSizeCached() * 2; + EXPECT_TRUE(Primary::canAllocate(Size)); + const scudo::uptr ClassId = + Primary::SizeClassMap::getClassIdBySize(Size); + void *P = Cache.allocate(ClassId); + EXPECT_NE(P, nullptr); + Cache.deallocate(ClassId, P); + Cache.destroy(nullptr); + EXPECT_GT(Allocator->releaseToOS(), 0U); +} + +TEST(ScudoPrimaryTest, ReleaseToOS) { + using SizeClassMap = scudo::DefaultSizeClassMap; + testReleaseToOS>(); + testReleaseToOS>(); +} From 6e1a0cf46bae77e1a83416f9479884cbb5e0164a Mon Sep 17 00:00:00 2001 From: Walter Erquinigo Date: Mon, 7 Oct 2019 17:49:32 +0000 Subject: [PATCH 134/254] [platform process list] add a flag for showing the processes of all users Summary: For context: https://reviews.llvm.org/D68293 We need a way to show all the processes on android regardless of the user id. When you run `platform process list`, you only see the processes with the same user as the user that launched lldb-server. However, it's quite useful to see all the processes, though, and it will lay a foundation for full apk debugging support from lldb. Before: ``` PID PARENT USER TRIPLE NAME ====== ====== ========== ======================== ============================ 3234 1 aarch64-unknown-linux-android adbd 8034 3234 aarch64-unknown-linux-android sh 9096 3234 aarch64-unknown-linux-android sh 9098 9096 aarch64-unknown-linux-android lldb-server (lldb) ^D ``` Now: ``` (lldb) platform process list -x 205 matching processes were found on "remote-android" PID PARENT USER TRIPLE NAME ====== ====== ========== ======================== ============================ 1 0 init 524 1 init 525 1 init 531 1 ueventd 568 1 logd 569 1 aarch64-unknown-linux-android servicemanager 570 1 aarch64-unknown-linux-android hwservicemanager 571 1 aarch64-unknown-linux-android vndservicemanager 577 1 aarch64-unknown-linux-android qseecomd 580 577 aarch64-unknown-linux-android qseecomd ... 23816 979 com.android.providers.calendar 24600 979 com.verizon.mips.services 27888 979 com.hualai 28043 2378 com.android.chrome:sandboxed_process0 31449 979 com.att.shm 31779 979 com.samsung.android.authfw 31846 979 com.samsung.android.server.iris 32014 979 com.samsung.android.MtpApplication 32045 979 com.samsung.InputEventApp ``` Reviewers: labath,xiaobai,aadsm,clayborg Subscribers: llvm-svn: 373931 --- .../gdb_remote_client/TestPlatformClient.py | 34 +++++++++++++++++++ .../gdb_remote_client/gdbclientutils.py | 28 +++++++++++++-- .../source/Commands/CommandObjectPlatform.cpp | 4 +++ lldb/source/Commands/Options.td | 3 ++ .../GDBRemoteCommunicationClient.cpp | 3 +- 5 files changed, 68 insertions(+), 4 deletions(-) create mode 100644 lldb/packages/Python/lldbsuite/test/functionalities/gdb_remote_client/TestPlatformClient.py diff --git a/lldb/packages/Python/lldbsuite/test/functionalities/gdb_remote_client/TestPlatformClient.py b/lldb/packages/Python/lldbsuite/test/functionalities/gdb_remote_client/TestPlatformClient.py new file mode 100644 index 0000000000000..d0087770256ec --- /dev/null +++ b/lldb/packages/Python/lldbsuite/test/functionalities/gdb_remote_client/TestPlatformClient.py @@ -0,0 +1,34 @@ +import lldb +import binascii +import os +from lldbsuite.test.lldbtest import * +from lldbsuite.test.decorators import * +from gdbclientutils import * + + +class TestPlatformClient(GDBRemoteTestBase): + + def test_process_list_with_all_users(self): + """Test connecting to a remote linux platform""" + + class MyResponder(MockGDBServerResponder): + def qfProcessInfo(self, packet): + if "all_users:1" in packet: + return "pid:10;ppid:1;uid:1;gid:1;euid:1;egid:1;name:" + binascii.hexlify("/a/process") + ";args:" + else: + return "E04" + + self.server.responder = MyResponder() + + self.runCmd("platform select remote-linux") + + try: + self.runCmd("platform connect connect://localhost:%d" % + self.server.port) + self.assertTrue(self.dbg.GetSelectedPlatform().IsConnected()) + self.expect("platform process list -x", + startstr="1 matching process was found", endstr="process" + os.linesep) + self.expect("platform process list", + error="error: no processes were found on the \"remote-linux\" platform") + finally: + self.runCmd("platform disconnect") diff --git a/lldb/packages/Python/lldbsuite/test/functionalities/gdb_remote_client/gdbclientutils.py b/lldb/packages/Python/lldbsuite/test/functionalities/gdb_remote_client/gdbclientutils.py index fad41f8a83a40..73c698d1e3e8f 100644 --- a/lldb/packages/Python/lldbsuite/test/functionalities/gdb_remote_client/gdbclientutils.py +++ b/lldb/packages/Python/lldbsuite/test/functionalities/gdb_remote_client/gdbclientutils.py @@ -160,9 +160,34 @@ def respond(self, packet): return self.QListThreadsInStopReply() if packet.startswith("qMemoryRegionInfo:"): return self.qMemoryRegionInfo() + if packet == "qQueryGDBServer": + return self.qQueryGDBServer() + if packet == "qHostInfo": + return self.qHostInfo() + if packet == "qGetWorkingDir": + return self.qGetWorkingDir() + if packet == "qsProcessInfo": + return self.qsProcessInfo() + if packet.startswith("qfProcessInfo"): + return self.qfProcessInfo(packet) return self.other(packet) + def qsProcessInfo(self): + return "E04" + + def qfProcessInfo(self, packet): + raise "E04" + + def qGetWorkingDir(self): + return "2f" + + def qHostInfo(self): + return "ptrsize:8;endian:little;" + + def qQueryGDBServer(self): + return "E04" + def interrupt(self): raise self.UnexpectedPacketException() @@ -171,7 +196,7 @@ def cont(self): def vCont(self, packet): raise self.UnexpectedPacketException() - + def readRegisters(self): return "00000000" * self.registerCount @@ -425,7 +450,6 @@ def _handlePacket(self, packet): class InvalidPacketException(Exception): pass - class GDBRemoteTestBase(TestBase): """ Base class for GDB client tests. diff --git a/lldb/source/Commands/CommandObjectPlatform.cpp b/lldb/source/Commands/CommandObjectPlatform.cpp index 7fb37839be714..fbd13aa37bdab 100644 --- a/lldb/source/Commands/CommandObjectPlatform.cpp +++ b/lldb/source/Commands/CommandObjectPlatform.cpp @@ -1264,6 +1264,10 @@ class CommandObjectPlatformProcessList : public CommandObjectParsed { verbose = true; break; + case 'x': + match_info.SetMatchAllUsers(true); + break; + default: llvm_unreachable("Unimplemented option"); } diff --git a/lldb/source/Commands/Options.td b/lldb/source/Commands/Options.td index 477b553294688..87f5506c305fb 100644 --- a/lldb/source/Commands/Options.td +++ b/lldb/source/Commands/Options.td @@ -591,6 +591,9 @@ let Command = "platform process list" in { def platform_process_list_show_args : Option<"show-args", "A">, GroupRange<1, 6>, Desc<"Show process arguments instead of the process executable basename.">; + def platform_process_list_all_users: Option<"all-users", "x">, + GroupRange<1,6>, + Desc<"Show processes matching all user IDs.">; def platform_process_list_verbose : Option<"verbose", "v">, GroupRange<1, 6>, Desc<"Enable verbose output.">; } diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp index 63d0522da3b08..7cb9ce0f52f33 100644 --- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp +++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp @@ -2176,8 +2176,7 @@ uint32_t GDBRemoteCommunicationClient::FindProcesses( if (match_info.GetProcessInfo().EffectiveGroupIDIsValid()) packet.Printf("egid:%u;", match_info.GetProcessInfo().GetEffectiveGroupID()); - if (match_info.GetProcessInfo().EffectiveGroupIDIsValid()) - packet.Printf("all_users:%u;", match_info.GetMatchAllUsers() ? 1 : 0); + packet.Printf("all_users:%u;", match_info.GetMatchAllUsers() ? 1 : 0); if (match_info.GetProcessInfo().GetArchitecture().IsValid()) { const ArchSpec &match_arch = match_info.GetProcessInfo().GetArchitecture(); From 29f7e17cb8b60ecfe4af313cc22fea7a8feb156c Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Mon, 7 Oct 2019 17:55:05 +0000 Subject: [PATCH 135/254] Try to get clangd tests passing on Windows. Part of PR43592. See also r328645. llvm-svn: 373932 --- .../clangd/test/semantic-highlighting.test | 8 ++++---- clang-tools-extra/clangd/test/type-hierarchy.test | 12 ++++++------ 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/clang-tools-extra/clangd/test/semantic-highlighting.test b/clang-tools-extra/clangd/test/semantic-highlighting.test index d802efbc052ba..c80446973764d 100644 --- a/clang-tools-extra/clangd/test/semantic-highlighting.test +++ b/clang-tools-extra/clangd/test/semantic-highlighting.test @@ -65,7 +65,7 @@ # CHECK-NEXT: } # CHECK-NEXT: ], # CHECK-NEXT: "textDocument": { -# CHECK-NEXT: "uri": "file:///clangd-test/foo.cpp" +# CHECK-NEXT: "uri": "file://{{.*}}/clangd-test/foo.cpp" # CHECK-NEXT: } # CHECK-NEXT: } # CHECK-NEXT:} @@ -84,7 +84,7 @@ # CHECK-NEXT: } # CHECK-NEXT: ], # CHECK-NEXT: "textDocument": { -# CHECK-NEXT: "uri": "file:///clangd-test/foo2.cpp" +# CHECK-NEXT: "uri": "file://{{.*}}/clangd-test/foo2.cpp" # CHECK-NEXT: } # CHECK-NEXT: } # CHECK-NEXT:} @@ -99,7 +99,7 @@ # CHECK-NEXT: } # CHECK-NEXT: ], # CHECK-NEXT: "textDocument": { -# CHECK-NEXT: "uri": "file:///clangd-test/foo.cpp" +# CHECK-NEXT: "uri": "file://{{.*}}/clangd-test/foo.cpp" # CHECK-NEXT: } # CHECK-NEXT: } # CHECK-NEXT:} @@ -114,7 +114,7 @@ # CHECK-NEXT: } # CHECK-NEXT: ], # CHECK-NEXT: "textDocument": { -# CHECK-NEXT: "uri": "file:///clangd-test/foo.cpp" +# CHECK-NEXT: "uri": "file://{{.*}}/clangd-test/foo.cpp" # CHECK-NEXT: } # CHECK-NEXT: } # CHECK-NEXT:} diff --git a/clang-tools-extra/clangd/test/type-hierarchy.test b/clang-tools-extra/clangd/test/type-hierarchy.test index b2e78ae249dcc..272fb71f3ab82 100644 --- a/clang-tools-extra/clangd/test/type-hierarchy.test +++ b/clang-tools-extra/clangd/test/type-hierarchy.test @@ -32,7 +32,7 @@ # CHECK-NEXT: "line": 3 # CHECK-NEXT: } # CHECK-NEXT: }, -# CHECK-NEXT: "uri": "file:///clangd-test/main.cpp" +# CHECK-NEXT: "uri": "file://{{.*}}/clangd-test/main.cpp" # CHECK-NEXT: } # CHECK-NEXT: ], # CHECK-NEXT: "kind": 23, @@ -66,7 +66,7 @@ # CHECK-NEXT: "line": 0 # CHECK-NEXT: } # CHECK-NEXT: }, -# CHECK-NEXT: "uri": "file:///clangd-test/main.cpp" +# CHECK-NEXT: "uri": "file://{{.*}}/clangd-test/main.cpp" # CHECK-NEXT: } # CHECK-NEXT: ], # CHECK-NEXT: "range": { @@ -89,7 +89,7 @@ # CHECK-NEXT: "line": 1 # CHECK-NEXT: } # CHECK-NEXT: }, -# CHECK-NEXT: "uri": "file:///clangd-test/main.cpp" +# CHECK-NEXT: "uri": "file://{{.*}}/clangd-test/main.cpp" # CHECK-NEXT: } # CHECK-NEXT: ], # CHECK-NEXT: "range": { @@ -112,7 +112,7 @@ # CHECK-NEXT: "line": 2 # CHECK-NEXT: } # CHECK-NEXT: }, -# CHECK-NEXT: "uri": "file:///clangd-test/main.cpp" +# CHECK-NEXT: "uri": "file://{{.*}}/clangd-test/main.cpp" # CHECK-NEXT: } --- {"jsonrpc":"2.0","id":2,"method":"typeHierarchy/resolve","params":{"item":{"uri":"test:///main.cpp","data":"A6576FE083F2949A","name":"Child3","kind":23,"range":{"end":{"character":13,"line":3},"start":{"character":7,"line":3}},"selectionRange":{"end":{"character":13,"line":3},"start":{"character":7,"line":3}}},"direction":0,"resolve":1}} @@ -144,7 +144,7 @@ # CHECK-NEXT: "line": 4 # CHECK-NEXT: } # CHECK-NEXT: }, -# CHECK-NEXT: "uri": "file:///clangd-test/main.cpp" +# CHECK-NEXT: "uri": "file://{{.*}}/clangd-test/main.cpp" # CHECK-NEXT: } # CHECK-NEXT: ], # CHECK-NEXT: "data": "A6576FE083F2949A", @@ -170,7 +170,7 @@ # CHECK-NEXT: "line": 3 # CHECK-NEXT: } # CHECK-NEXT: }, -# CHECK-NEXT: "uri": "file:///clangd-test/main.cpp" +# CHECK-NEXT: "uri": "file://{{.*}}/clangd-test/main.cpp" # CHECK-NEXT: } --- {"jsonrpc":"2.0","id":3,"method":"shutdown"} From f385a381404797f5c44298d34c4ba804fad556d3 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Mon, 7 Oct 2019 18:12:10 +0000 Subject: [PATCH 136/254] [libc++abi] Remove redundant link flags on Apple platforms These flags are already set when we create the cxxabi_shared target using the SOVERSION and VERSION target properties, and the install_name was already being overriden to '@rpath/libc++abi.1.dylib' by CMake because no 'CMAKE_INSTALL_NAME_DIR' option was specified. So this is effectively a removal of dead code with no intended functionality change. The only think we're losing here is that we used to link against libSystem.B.dylib instead of libSystem.dylib when building libc++abi for macOS 10.6 -- however, I strongly suspect nobody's building libc++abi from source for that target anymore. llvm-svn: 373934 --- libcxxabi/src/CMakeLists.txt | 19 +------------------ 1 file changed, 1 insertion(+), 18 deletions(-) diff --git a/libcxxabi/src/CMakeLists.txt b/libcxxabi/src/CMakeLists.txt index cbf4b21c31f11..8d4eb795170ea 100644 --- a/libcxxabi/src/CMakeLists.txt +++ b/libcxxabi/src/CMakeLists.txt @@ -103,23 +103,7 @@ endif () # Setup flags. add_link_flags_if_supported(-nodefaultlibs) -set(LIBCXXABI_SHARED_LINK_FLAGS) - if ( APPLE ) - if ( CMAKE_OSX_DEPLOYMENT_TARGET STREQUAL "10.6" ) - list(APPEND LIBCXXABI_COMPILE_FLAGS "-U__STRICT_ANSI__") - list(APPEND LIBCXXABI_SHARED_LINK_FLAGS - "-compatibility_version 1" - "-current_version 1" - "-install_name /usr/lib/libc++abi.1.dylib") - list(APPEND LIBCXXABI_LINK_FLAGS - "/usr/lib/libSystem.B.dylib") - else() - list(APPEND LIBCXXABI_SHARED_LINK_FLAGS - "-compatibility_version 1" - "-install_name /usr/lib/libc++abi.1.dylib") - endif() - if (LLVM_USE_SANITIZER) if (("${LLVM_USE_SANITIZER}" STREQUAL "Address") OR ("${LLVM_USE_SANITIZER}" STREQUAL "Address;Undefined") OR @@ -148,7 +132,6 @@ endif() split_list(LIBCXXABI_COMPILE_FLAGS) split_list(LIBCXXABI_LINK_FLAGS) -split_list(LIBCXXABI_SHARED_LINK_FLAGS) # FIXME: libc++abi.so will not link when modules are enabled because it depends # on symbols defined in libc++.so which has not yet been built. @@ -187,7 +170,7 @@ if (LIBCXXABI_ENABLE_SHARED) COMPILE_FLAGS "${LIBCXXABI_COMPILE_FLAGS}" LINK_FLAGS - "${LIBCXXABI_LINK_FLAGS} ${LIBCXXABI_SHARED_LINK_FLAGS}" + "${LIBCXXABI_LINK_FLAGS}" OUTPUT_NAME "c++abi" SOVERSION From fdaa74217420729140f1786ea037ac445a724c8e Mon Sep 17 00:00:00 2001 From: Jordan Rose Date: Mon, 7 Oct 2019 18:14:24 +0000 Subject: [PATCH 137/254] Second attempt to add iterator_range::empty() Doing this makes MSVC complain that `empty(someRange)` could refer to either C++17's std::empty or LLVM's llvm::empty, which previously we avoided via SFINAE because std::empty is defined in terms of an empty member rather than begin and end. So, switch callers over to the new method as it is added. https://reviews.llvm.org/D68439 llvm-svn: 373935 --- llvm/include/llvm/ADT/iterator_range.h | 1 + llvm/lib/Analysis/LazyCallGraph.cpp | 2 +- llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 4 ++-- .../GlobalISel/InstructionSelector.cpp | 2 +- llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp | 2 +- llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp | 2 +- .../CodeGen/GlobalISel/RegisterBankInfo.cpp | 2 +- llvm/lib/CodeGen/MachineModuleInfo.cpp | 4 ++-- .../ExecutionEngine/Orc/ExecutionUtils.cpp | 2 +- llvm/lib/IR/DebugInfo.cpp | 2 +- .../Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 22 +++++++++---------- .../Target/BPF/BPFAbstractMemberAccess.cpp | 2 +- llvm/lib/Target/BPF/BPFAsmPrinter.cpp | 2 +- llvm/lib/Target/PowerPC/PPCInstrInfo.cpp | 2 +- llvm/lib/Transforms/IPO/PartialInlining.cpp | 4 ++-- llvm/lib/Transforms/Scalar/IndVarSimplify.cpp | 2 +- llvm/lib/Transforms/Scalar/NewGVN.cpp | 2 +- llvm/lib/Transforms/Utils/PredicateInfo.cpp | 4 ++-- llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 2 +- 19 files changed, 33 insertions(+), 32 deletions(-) diff --git a/llvm/include/llvm/ADT/iterator_range.h b/llvm/include/llvm/ADT/iterator_range.h index 774c7c4e3366e..aa8830943cabc 100644 --- a/llvm/include/llvm/ADT/iterator_range.h +++ b/llvm/include/llvm/ADT/iterator_range.h @@ -44,6 +44,7 @@ class iterator_range { IteratorT begin() const { return begin_iterator; } IteratorT end() const { return end_iterator; } + bool empty() const { return begin_iterator == end_iterator; } }; /// Convenience function for iterating over sub-ranges. diff --git a/llvm/lib/Analysis/LazyCallGraph.cpp b/llvm/lib/Analysis/LazyCallGraph.cpp index cba8db49f0204..ef31c1e0ba8ce 100644 --- a/llvm/lib/Analysis/LazyCallGraph.cpp +++ b/llvm/lib/Analysis/LazyCallGraph.cpp @@ -632,7 +632,7 @@ LazyCallGraph::RefSCC::switchInternalEdgeToCall( // If the merge range is empty, then adding the edge didn't actually form any // new cycles. We're done. - if (empty(MergeRange)) { + if (MergeRange.empty()) { // Now that the SCC structure is finalized, flip the kind to call. SourceN->setEdgeKind(TargetN, Edge::Call); return false; // No new cycle. diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 0e188fe1fd153..61a5445ff4114 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -1054,7 +1054,7 @@ void DwarfDebug::finalizeModuleInfo() { // If we're splitting the dwarf out now that we've got the entire // CU then add the dwo id to it. auto *SkCU = TheCU.getSkeleton(); - if (useSplitDwarf() && !empty(TheCU.getUnitDie().children())) { + if (useSplitDwarf() && !TheCU.getUnitDie().children().empty()) { finishUnitAttributes(TheCU.getCUNode(), TheCU); TheCU.addString(TheCU.getUnitDie(), dwarf::DW_AT_GNU_dwo_name, Asm->TM.Options.MCOptions.SplitDwarfFile); @@ -1106,7 +1106,7 @@ void DwarfDebug::finalizeModuleInfo() { // is a bit pessimistic under LTO. if (!AddrPool.isEmpty() && (getDwarfVersion() >= 5 || - (SkCU && !empty(TheCU.getUnitDie().children())))) + (SkCU && !TheCU.getUnitDie().children().empty()))) U.addAddrTableBase(); if (getDwarfVersion() >= 5) { diff --git a/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp b/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp index 2ad35b3a72c98..28143b30d4e8b 100644 --- a/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp +++ b/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp @@ -79,5 +79,5 @@ bool InstructionSelector::isObviouslySafeToFold(MachineInstr &MI, return true; return !MI.mayLoadOrStore() && !MI.mayRaiseFPException() && - !MI.hasUnmodeledSideEffects() && empty(MI.implicit_operands()); + !MI.hasUnmodeledSideEffects() && MI.implicit_operands().empty(); } diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp index ebe3b7c640cf1..70045512fae51 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp @@ -433,7 +433,7 @@ LegalizeRuleSet &LegalizerInfo::getActionDefinitionsBuilder( std::initializer_list Opcodes) { unsigned Representative = *Opcodes.begin(); - assert(!empty(Opcodes) && Opcodes.begin() + 1 != Opcodes.end() && + assert(!llvm::empty(Opcodes) && Opcodes.begin() + 1 != Opcodes.end() && "Initializer list must have at least two opcodes"); for (auto I = Opcodes.begin() + 1, E = Opcodes.end(); I != E; ++I) diff --git a/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp b/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp index e69dc136096ea..ddf4c9e2bb2ed 100644 --- a/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp +++ b/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp @@ -139,7 +139,7 @@ bool RegBankSelect::repairReg( "need new vreg for each breakdown"); // An empty range of new register means no repairing. - assert(!empty(NewVRegs) && "We should not have to repair"); + assert(!NewVRegs.empty() && "We should not have to repair"); MachineInstr *MI; if (ValMapping.NumBreakDowns == 1) { diff --git a/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp b/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp index 82eaa88abc78d..3fcc55286bebb 100644 --- a/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp +++ b/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp @@ -455,7 +455,7 @@ void RegisterBankInfo::applyDefaultMapping(const OperandsMapper &OpdMapper) { "This mapping is too complex for this function"); iterator_range::const_iterator> NewRegs = OpdMapper.getVRegs(OpIdx); - if (empty(NewRegs)) { + if (NewRegs.empty()) { LLVM_DEBUG(dbgs() << " has not been repaired, nothing to be done\n"); continue; } diff --git a/llvm/lib/CodeGen/MachineModuleInfo.cpp b/llvm/lib/CodeGen/MachineModuleInfo.cpp index 50a9251780e69..e0b4e9cac229e 100644 --- a/llvm/lib/CodeGen/MachineModuleInfo.cpp +++ b/llvm/lib/CodeGen/MachineModuleInfo.cpp @@ -346,7 +346,7 @@ char MachineModuleInfoWrapperPass::ID = 0; bool MachineModuleInfoWrapperPass::doInitialization(Module &M) { MMI.initialize(); MMI.TheModule = &M; - MMI.DbgInfoAvailable = !empty(M.debug_compile_units()); + MMI.DbgInfoAvailable = !M.debug_compile_units().empty(); return false; } @@ -361,6 +361,6 @@ MachineModuleInfo MachineModuleAnalysis::run(Module &M, ModuleAnalysisManager &) { MachineModuleInfo MMI(TM); MMI.TheModule = &M; - MMI.DbgInfoAvailable = !empty(M.debug_compile_units()); + MMI.DbgInfoAvailable = !M.debug_compile_units().empty(); return MMI; } diff --git a/llvm/lib/ExecutionEngine/Orc/ExecutionUtils.cpp b/llvm/lib/ExecutionEngine/Orc/ExecutionUtils.cpp index c20d7d1d0faf0..4a886ac0597c1 100644 --- a/llvm/lib/ExecutionEngine/Orc/ExecutionUtils.cpp +++ b/llvm/lib/ExecutionEngine/Orc/ExecutionUtils.cpp @@ -88,7 +88,7 @@ iterator_range getDestructors(const Module &M) { } void CtorDtorRunner::add(iterator_range CtorDtors) { - if (empty(CtorDtors)) + if (CtorDtors.empty()) return; MangleAndInterner Mangle( diff --git a/llvm/lib/IR/DebugInfo.cpp b/llvm/lib/IR/DebugInfo.cpp index 8f5101a4ae72f..1bbe6b85d2600 100644 --- a/llvm/lib/IR/DebugInfo.cpp +++ b/llvm/lib/IR/DebugInfo.cpp @@ -279,7 +279,7 @@ bool DebugInfoFinder::addScope(DIScope *Scope) { } static MDNode *stripDebugLocFromLoopID(MDNode *N) { - assert(!empty(N->operands()) && "Missing self reference?"); + assert(!N->operands().empty() && "Missing self reference?"); // if there is no debug location, we do not have to rewrite this MDNode. if (std::none_of(N->op_begin() + 1, N->op_end(), [](const MDOperand &Op) { diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index 36bb2aae0c557..d19874d434e8e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -1588,7 +1588,7 @@ void AMDGPURegisterBankInfo::applyMappingImpl( if (DstTy != LLT::vector(2, 16)) break; - assert(MI.getNumOperands() == 3 && empty(OpdMapper.getVRegs(0))); + assert(MI.getNumOperands() == 3 && OpdMapper.getVRegs(0).empty()); substituteSimpleCopyRegs(OpdMapper, 1); substituteSimpleCopyRegs(OpdMapper, 2); @@ -1644,7 +1644,7 @@ void AMDGPURegisterBankInfo::applyMappingImpl( case AMDGPU::G_EXTRACT_VECTOR_ELT: { SmallVector DstRegs(OpdMapper.getVRegs(0)); - assert(empty(OpdMapper.getVRegs(1)) && empty(OpdMapper.getVRegs(2))); + assert(OpdMapper.getVRegs(1).empty() && OpdMapper.getVRegs(2).empty()); if (DstRegs.empty()) { applyDefaultMapping(OpdMapper); @@ -1708,9 +1708,9 @@ void AMDGPURegisterBankInfo::applyMappingImpl( case AMDGPU::G_INSERT_VECTOR_ELT: { SmallVector InsRegs(OpdMapper.getVRegs(2)); - assert(empty(OpdMapper.getVRegs(0))); - assert(empty(OpdMapper.getVRegs(1))); - assert(empty(OpdMapper.getVRegs(3))); + assert(OpdMapper.getVRegs(0).empty()); + assert(OpdMapper.getVRegs(1).empty()); + assert(OpdMapper.getVRegs(3).empty()); if (InsRegs.empty()) { applyDefaultMapping(OpdMapper); @@ -1785,8 +1785,8 @@ void AMDGPURegisterBankInfo::applyMappingImpl( case Intrinsic::amdgcn_readlane: { substituteSimpleCopyRegs(OpdMapper, 2); - assert(empty(OpdMapper.getVRegs(0))); - assert(empty(OpdMapper.getVRegs(3))); + assert(OpdMapper.getVRegs(0).empty()); + assert(OpdMapper.getVRegs(3).empty()); // Make sure the index is an SGPR. It doesn't make sense to run this in a // waterfall loop, so assume it's a uniform value. @@ -1794,9 +1794,9 @@ void AMDGPURegisterBankInfo::applyMappingImpl( return; } case Intrinsic::amdgcn_writelane: { - assert(empty(OpdMapper.getVRegs(0))); - assert(empty(OpdMapper.getVRegs(2))); - assert(empty(OpdMapper.getVRegs(3))); + assert(OpdMapper.getVRegs(0).empty()); + assert(OpdMapper.getVRegs(2).empty()); + assert(OpdMapper.getVRegs(3).empty()); substituteSimpleCopyRegs(OpdMapper, 4); // VGPR input val constrainOpWithReadfirstlane(MI, MRI, 2); // Source value @@ -1818,7 +1818,7 @@ void AMDGPURegisterBankInfo::applyMappingImpl( case Intrinsic::amdgcn_ds_ordered_add: case Intrinsic::amdgcn_ds_ordered_swap: { // This is only allowed to execute with 1 lane, so readfirstlane is safe. - assert(empty(OpdMapper.getVRegs(0))); + assert(OpdMapper.getVRegs(0).empty()); substituteSimpleCopyRegs(OpdMapper, 3); constrainOpWithReadfirstlane(MI, MRI, 2); // M0 return; diff --git a/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp b/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp index 870300ab2b255..5a9a34e4af3c2 100644 --- a/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp +++ b/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp @@ -147,7 +147,7 @@ bool BPFAbstractMemberAccess::runOnModule(Module &M) { LLVM_DEBUG(dbgs() << "********** Abstract Member Accesses **********\n"); // Bail out if no debug info. - if (empty(M.debug_compile_units())) + if (M.debug_compile_units().empty()) return false; return doTransformation(M); diff --git a/llvm/lib/Target/BPF/BPFAsmPrinter.cpp b/llvm/lib/Target/BPF/BPFAsmPrinter.cpp index e61e734680578..218b0302927c5 100644 --- a/llvm/lib/Target/BPF/BPFAsmPrinter.cpp +++ b/llvm/lib/Target/BPF/BPFAsmPrinter.cpp @@ -59,7 +59,7 @@ bool BPFAsmPrinter::doInitialization(Module &M) { AsmPrinter::doInitialization(M); // Only emit BTF when debuginfo available. - if (MAI->doesSupportDebugInformation() && !empty(M.debug_compile_units())) { + if (MAI->doesSupportDebugInformation() && !M.debug_compile_units().empty()) { BTF = new BTFDebug(this); Handlers.push_back(HandlerInfo(std::unique_ptr(BTF), "emit", "Debug Info Emission", "BTF", diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index dc013c8ff9a04..06533fe0de33b 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -2273,7 +2273,7 @@ void PPCInstrInfo::replaceInstrOperandWithImm(MachineInstr &MI, Register InUseReg = MI.getOperand(OpNo).getReg(); MI.getOperand(OpNo).ChangeToImmediate(Imm); - if (empty(MI.implicit_operands())) + if (MI.implicit_operands().empty()) return; // We need to make sure that the MI didn't have any implicit use diff --git a/llvm/lib/Transforms/IPO/PartialInlining.cpp b/llvm/lib/Transforms/IPO/PartialInlining.cpp index 62f4584d5f58d..a0f0b6726cc2b 100644 --- a/llvm/lib/Transforms/IPO/PartialInlining.cpp +++ b/llvm/lib/Transforms/IPO/PartialInlining.cpp @@ -1264,7 +1264,7 @@ std::pair PartialInlinerImpl::unswitchFunction(Function *F) { if (PSI->isFunctionEntryCold(F)) return {false, nullptr}; - if (empty(F->users())) + if (F->users().empty()) return {false, nullptr}; OptimizationRemarkEmitter ORE(F); @@ -1370,7 +1370,7 @@ bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) { return false; } - assert(empty(Cloner.OrigFunc->users()) && + assert(Cloner.OrigFunc->users().empty() && "F's users should all be replaced!"); std::vector Users(Cloner.ClonedFunc->user_begin(), diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp index a9fdfbaef3f4c..1aaa0265bade6 100644 --- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -2789,7 +2789,7 @@ bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) { // have already been removed; TODO: generalize BasicBlock *ExitBlock = BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0); - if (!empty(ExitBlock->phis())) + if (!ExitBlock->phis().empty()) return true; const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); diff --git a/llvm/lib/Transforms/Scalar/NewGVN.cpp b/llvm/lib/Transforms/Scalar/NewGVN.cpp index 91c879097afc2..c37da39b70b61 100644 --- a/llvm/lib/Transforms/Scalar/NewGVN.cpp +++ b/llvm/lib/Transforms/Scalar/NewGVN.cpp @@ -1754,7 +1754,7 @@ NewGVN::performSymbolicPHIEvaluation(ArrayRef PHIOps, return true; }); // If we are left with no operands, it's dead. - if (empty(Filtered)) { + if (Filtered.empty()) { // If it has undef at this point, it means there are no-non-undef arguments, // and thus, the value of the phi node must be undef. if (HasUndef) { diff --git a/llvm/lib/Transforms/Utils/PredicateInfo.cpp b/llvm/lib/Transforms/Utils/PredicateInfo.cpp index 3c288bab3779f..44859eafb9c19 100644 --- a/llvm/lib/Transforms/Utils/PredicateInfo.cpp +++ b/llvm/lib/Transforms/Utils/PredicateInfo.cpp @@ -556,7 +556,7 @@ Value *PredicateInfo::materializeStack(unsigned int &Counter, if (isa(ValInfo)) { IRBuilder<> B(getBranchTerminator(ValInfo)); Function *IF = getCopyDeclaration(F.getParent(), Op->getType()); - if (empty(IF->users())) + if (IF->users().empty()) CreatedDeclarations.insert(IF); CallInst *PIC = B.CreateCall(IF, Op, Op->getName() + "." + Twine(Counter++)); @@ -568,7 +568,7 @@ Value *PredicateInfo::materializeStack(unsigned int &Counter, "Should not have gotten here without it being an assume"); IRBuilder<> B(PAssume->AssumeInst); Function *IF = getCopyDeclaration(F.getParent(), Op->getType()); - if (empty(IF->users())) + if (IF->users().empty()) CreatedDeclarations.insert(IF); CallInst *PIC = B.CreateCall(IF, Op); PredicateMap.insert({PIC, ValInfo}); diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 279a844f9e444..008abad181a35 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -5314,7 +5314,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, // Figure out the corresponding result for each case value and phi node in the // common destination, as well as the min and max case values. - assert(!empty(SI->cases())); + assert(!SI->cases().empty()); SwitchInst::CaseIt CI = SI->case_begin(); ConstantInt *MinCaseVal = CI->getCaseValue(); ConstantInt *MaxCaseVal = CI->getCaseValue(); From cdbeaf548f1ff1ac49dfd26de25a67c8ac081996 Mon Sep 17 00:00:00 2001 From: Michal Gorny Date: Mon, 7 Oct 2019 18:14:56 +0000 Subject: [PATCH 138/254] [clang] [cmake] Support LLVM_DISTRIBUTION_COMPONENTS in stand-alone build Differential Revision: https://reviews.llvm.org/D68412 llvm-svn: 373936 --- clang/CMakeLists.txt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/clang/CMakeLists.txt b/clang/CMakeLists.txt index aa21329365db4..45d2431a74628 100644 --- a/clang/CMakeLists.txt +++ b/clang/CMakeLists.txt @@ -114,6 +114,7 @@ if( CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR ) include(TableGen) include(HandleLLVMOptions) include(VersionFromVCS) + include(LLVMDistributionSupport) set(PACKAGE_VERSION "${LLVM_PACKAGE_VERSION}") @@ -858,6 +859,10 @@ if (LLVM_ADD_NATIVE_VISUALIZERS_TO_SOLUTION) endif() add_subdirectory(utils/hmaptool) +if(CLANG_BUILT_STANDALONE) + llvm_distribution_add_targets() +endif() + configure_file( ${CLANG_SOURCE_DIR}/include/clang/Config/config.h.cmake ${CLANG_BINARY_DIR}/include/clang/Config/config.h) From 27269054d2df505f576eb3992d3f815c455ac7bb Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 7 Oct 2019 18:43:29 +0000 Subject: [PATCH 139/254] GlobalISel: Add target pre-isel instructions Allows targets to introduce regbankselectable pseudo-instructions. Currently the closet feature to this is an intrinsic. However this requires creating a public intrinsic declaration. This litters the public intrinsic namespace with operations we don't necessarily want to expose to IR producers, and would rather leave as private to the backend. Use a new instruction bit. A previous attempt tried to keep using enum value ranges, but it turned into a mess. llvm-svn: 373937 --- llvm/include/llvm/CodeGen/MachineInstr.h | 6 ++ llvm/include/llvm/MC/MCInstrDesc.h | 7 +- llvm/include/llvm/Target/GenericOpcodes.td | 4 +- llvm/include/llvm/Target/Target.td | 4 ++ llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp | 5 +- llvm/lib/Target/AMDGPU/AMDGPUGISel.td | 1 + .../AMDGPU/AMDGPUInstructionSelector.cpp | 2 +- .../Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 1 + llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 3 +- llvm/lib/Target/AMDGPU/SIInstructions.td | 10 +++ .../inst-select-amdgpu-ffbh-u32.mir | 68 +++++++++++++++++++ .../regbankselect-amdgpu-ffbh-u32.mir | 32 +++++++++ llvm/utils/TableGen/CodeGenInstruction.cpp | 1 + llvm/utils/TableGen/CodeGenInstruction.h | 1 + llvm/utils/TableGen/InstrInfoEmitter.cpp | 1 + 15 files changed, 140 insertions(+), 6 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-ffbh-u32.mir create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgpu-ffbh-u32.mir diff --git a/llvm/include/llvm/CodeGen/MachineInstr.h b/llvm/include/llvm/CodeGen/MachineInstr.h index d3ebe00c1c059..00c8ca767ad7d 100644 --- a/llvm/include/llvm/CodeGen/MachineInstr.h +++ b/llvm/include/llvm/CodeGen/MachineInstr.h @@ -618,6 +618,12 @@ class MachineInstr return hasPropertyInBundle(1ULL << MCFlag, Type); } + /// Return true if this is an instruction that should go through the usual + /// legalization steps. + bool isPreISelOpcode(QueryType Type = IgnoreBundle) const { + return hasProperty(MCID::PreISelOpcode, Type); + } + /// Return true if this instruction can have a variable number of operands. /// In this case, the variable operands will be after the normal /// operands but before the implicit definitions and uses (if any are diff --git a/llvm/include/llvm/MC/MCInstrDesc.h b/llvm/include/llvm/MC/MCInstrDesc.h index b119d90f6513f..e75a27614a22d 100644 --- a/llvm/include/llvm/MC/MCInstrDesc.h +++ b/llvm/include/llvm/MC/MCInstrDesc.h @@ -129,7 +129,8 @@ namespace MCID { /// not use these directly. These all correspond to bitfields in the /// MCInstrDesc::Flags field. enum Flag { - Variadic = 0, + PreISelOpcode = 0, + Variadic, HasOptionalDef, Pseudo, Return, @@ -242,6 +243,10 @@ class MCInstrDesc { /// Return flags of this instruction. uint64_t getFlags() const { return Flags; } + /// \returns true if this instruction is emitted before instruction selection + /// and should be legalized/regbankselected/selected. + bool isPreISelOpcode() const { return Flags & (1ULL << MCID::PreISelOpcode); } + /// Return true if this instruction can have a variable number of /// operands. In this case, the variable operands will be after the normal /// operands but before the implicit definitions and uses (if any are diff --git a/llvm/include/llvm/Target/GenericOpcodes.td b/llvm/include/llvm/Target/GenericOpcodes.td index ad32b9fab75d3..4b49dfd4dd18e 100644 --- a/llvm/include/llvm/Target/GenericOpcodes.td +++ b/llvm/include/llvm/Target/GenericOpcodes.td @@ -15,7 +15,9 @@ // Unary ops. //------------------------------------------------------------------------------ -class GenericInstruction : StandardPseudoInstruction; +class GenericInstruction : StandardPseudoInstruction { + let isPreISelOpcode = 1; +} // Extend the underlying scalar type of an operation, leaving the high bits // unspecified. diff --git a/llvm/include/llvm/Target/Target.td b/llvm/include/llvm/Target/Target.td index 93a6135928828..dd8679661b9ac 100644 --- a/llvm/include/llvm/Target/Target.td +++ b/llvm/include/llvm/Target/Target.td @@ -492,6 +492,10 @@ class Instruction : InstructionEncoding { // Added complexity passed onto matching pattern. int AddedComplexity = 0; + // Indicates if this is a pre-isel opcode that should be + // legalized/regbankselected/selected. + bit isPreISelOpcode = 0; + // These bits capture information about the high-level semantics of the // instruction. bit isReturn = 0; // Is this instruction a return instruction? diff --git a/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp b/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp index ddf4c9e2bb2ed..f0e35c65c53b8 100644 --- a/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp +++ b/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp @@ -687,8 +687,9 @@ bool RegBankSelect::runOnMachineFunction(MachineFunction &MF) { // iterator before hand. MachineInstr &MI = *MII++; - // Ignore target-specific instructions: they should use proper regclasses. - if (isTargetSpecificOpcode(MI.getOpcode())) + // Ignore target-specific post-isel instructions: they should use proper + // regclasses. + if (isTargetSpecificOpcode(MI.getOpcode()) && !MI.isPreISelOpcode()) continue; if (!assignInstr(MI)) { diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td index 6ee11686f4859..f2be1ca44d346 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td @@ -116,6 +116,7 @@ def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; +def : GINodeEquiv; class GISelSop2Pat < SDPatternOperator node, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 4148d1d0b5523..056e104946157 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -1650,7 +1650,7 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) { if (I.isPHI()) return selectPHI(I); - if (!isPreISelGenericOpcode(I.getOpcode())) { + if (!I.isPreISelOpcode()) { if (I.isCopy()) return selectCOPY(I); return true; diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index d19874d434e8e..9446814c8f818 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -2305,6 +2305,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case AMDGPU::G_FCANONICALIZE: case AMDGPU::G_INTRINSIC_TRUNC: case AMDGPU::G_INTRINSIC_ROUND: + case AMDGPU::G_AMDGPU_FFBH_U32: return getDefaultMappingVOP(MI); case AMDGPU::G_UMULH: case AMDGPU::G_SMULH: { diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 7a6bb0e20b795..1d486feb32c3b 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -3117,7 +3117,8 @@ static bool shouldReadExec(const MachineInstr &MI) { return true; } - if (SIInstrInfo::isGenericOpcode(MI.getOpcode()) || + if (MI.isPreISelOpcode() || + SIInstrInfo::isGenericOpcode(MI.getOpcode()) || SIInstrInfo::isSALU(MI) || SIInstrInfo::isSMRD(MI)) return false; diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 15b9fce5341bf..6f310b4ebd1f0 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -1982,3 +1982,13 @@ def : FP16Med3Pat; defm : Int16Med3Pat; defm : Int16Med3Pat; } // End Predicates = [isGFX9Plus] + +class AMDGPUGenericInstruction : GenericInstruction { + let Namespace = "AMDGPU"; +} + +def G_AMDGPU_FFBH_U32 : AMDGPUGenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type1:$src); + let hasSideEffects = 0; +} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-ffbh-u32.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-ffbh-u32.mir new file mode 100644 index 0000000000000..cefd876daa82e --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-ffbh-u32.mir @@ -0,0 +1,68 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 %s -o - | FileCheck %s + +--- + +name: ffbh_u32_s32_s_s +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0 + + ; CHECK-LABEL: name: ffbh_u32_s32_s_s + ; CHECK: liveins: $sgpr0 + ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; CHECK: [[S_FLBIT_I32_B32_:%[0-9]+]]:sreg_32 = S_FLBIT_I32_B32 [[COPY]] + ; CHECK: S_ENDPGM 0, implicit [[S_FLBIT_I32_B32_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s32) = G_AMDGPU_FFBH_U32 %0 + S_ENDPGM 0, implicit %1 + +... + +--- + +name: ffbh_u32_s32_v_v +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: ffbh_u32_s32_v_v + ; CHECK: liveins: $vgpr0 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[AMDGPU_FFBH_U32_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_FFBH_U32 [[COPY]](s32) + ; CHECK: S_ENDPGM 0, implicit [[AMDGPU_FFBH_U32_]](s32) + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = G_AMDGPU_FFBH_U32 %0 + S_ENDPGM 0, implicit %1 + +... + +--- + +name: ffbh_u32_v_s +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0 + + ; CHECK-LABEL: name: ffbh_u32_v_s + ; CHECK: liveins: $sgpr0 + ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK: [[AMDGPU_FFBH_U32_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_FFBH_U32 [[COPY]](s32) + ; CHECK: S_ENDPGM 0, implicit [[AMDGPU_FFBH_U32_]](s32) + %0:sgpr(s32) = COPY $sgpr0 + %1:vgpr(s32) = G_AMDGPU_FFBH_U32 %0 + S_ENDPGM 0, implicit %1 + +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgpu-ffbh-u32.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgpu-ffbh-u32.mir new file mode 100644 index 0000000000000..a766df2a3d005 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgpu-ffbh-u32.mir @@ -0,0 +1,32 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s + +--- +name: ffbh_u32_s +legalized: true + +body: | + bb.0: + liveins: $sgpr0 + + ; CHECK-LABEL: name: ffbh_u32_s + ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK: [[AMDGPU_FFBH_U32_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_FFBH_U32 [[COPY]](s32) + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = G_AMDGPU_FFBH_U32 %0 +... + +--- +name: ffbh_u32_v +legalized: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + ; CHECK-LABEL: name: ffbh_u32_v + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[AMDGPU_FFBH_U32_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_FFBH_U32 [[COPY]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = G_AMDGPU_FFBH_U32 %0 +... diff --git a/llvm/utils/TableGen/CodeGenInstruction.cpp b/llvm/utils/TableGen/CodeGenInstruction.cpp index 2463824469abf..fde946d065891 100644 --- a/llvm/utils/TableGen/CodeGenInstruction.cpp +++ b/llvm/utils/TableGen/CodeGenInstruction.cpp @@ -363,6 +363,7 @@ CodeGenInstruction::CodeGenInstruction(Record *R) Namespace = R->getValueAsString("Namespace"); AsmString = R->getValueAsString("AsmString"); + isPreISelOpcode = R->getValueAsBit("isPreISelOpcode"); isReturn = R->getValueAsBit("isReturn"); isEHScopeReturn = R->getValueAsBit("isEHScopeReturn"); isBranch = R->getValueAsBit("isBranch"); diff --git a/llvm/utils/TableGen/CodeGenInstruction.h b/llvm/utils/TableGen/CodeGenInstruction.h index bb5b1369649f5..2cb28425df7aa 100644 --- a/llvm/utils/TableGen/CodeGenInstruction.h +++ b/llvm/utils/TableGen/CodeGenInstruction.h @@ -231,6 +231,7 @@ template class ArrayRef; std::vector ImplicitDefs, ImplicitUses; // Various boolean values we track for the instruction. + bool isPreISelOpcode : 1; bool isReturn : 1; bool isEHScopeReturn : 1; bool isBranch : 1; diff --git a/llvm/utils/TableGen/InstrInfoEmitter.cpp b/llvm/utils/TableGen/InstrInfoEmitter.cpp index b7961efbf9636..300ba36a70074 100644 --- a/llvm/utils/TableGen/InstrInfoEmitter.cpp +++ b/llvm/utils/TableGen/InstrInfoEmitter.cpp @@ -662,6 +662,7 @@ void InstrInfoEmitter::emitRecord(const CodeGenInstruction &Inst, unsigned Num, CodeGenTarget &Target = CDP.getTargetInfo(); // Emit all of the target independent flags... + if (Inst.isPreISelOpcode) OS << "|(1ULL< Date: Mon, 7 Oct 2019 18:43:31 +0000 Subject: [PATCH 140/254] AMDGPU/GlobalISel: Select more G_INSERT cases At minimum handle the s64 insert type, which are emitted in real cases during legalization. We really need TableGen to emit something to emit something like the inverse of composeSubRegIndices do determine the subreg index to use. llvm-svn: 373938 --- .../AMDGPU/AMDGPUInstructionSelector.cpp | 98 +++- .../AMDGPU/GlobalISel/inst-select-insert.mir | 447 +++++++++++++++++- 2 files changed, 503 insertions(+), 42 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 056e104946157..28ebbd9101c5e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -555,39 +555,97 @@ bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(MachineInstr &I) const { return false; } +// FIXME: TableGen should generate something to make this manageable for all +// register classes. At a minimum we could use the opposite of +// composeSubRegIndices and go up from the base 32-bit subreg. +static unsigned getSubRegForSizeAndOffset(const SIRegisterInfo &TRI, + unsigned Size, unsigned Offset) { + switch (Size) { + case 32: + return TRI.getSubRegFromChannel(Offset / 32); + case 64: { + switch (Offset) { + case 0: + return AMDGPU::sub0_sub1; + case 32: + return AMDGPU::sub1_sub2; + case 64: + return AMDGPU::sub2_sub3; + case 96: + return AMDGPU::sub4_sub5; + case 128: + return AMDGPU::sub5_sub6; + case 160: + return AMDGPU::sub7_sub8; + // FIXME: Missing cases up to 1024 bits + default: + return AMDGPU::NoSubRegister; + } + } + case 96: { + switch (Offset) { + case 0: + return AMDGPU::sub0_sub1_sub2; + case 32: + return AMDGPU::sub1_sub2_sub3; + case 64: + return AMDGPU::sub2_sub3_sub4; + } + } + default: + return AMDGPU::NoSubRegister; + } +} + bool AMDGPUInstructionSelector::selectG_INSERT(MachineInstr &I) const { MachineBasicBlock *BB = I.getParent(); + + Register DstReg = I.getOperand(0).getReg(); Register Src0Reg = I.getOperand(1).getReg(); Register Src1Reg = I.getOperand(2).getReg(); LLT Src1Ty = MRI->getType(Src1Reg); - if (Src1Ty.getSizeInBits() != 32) - return false; + + unsigned DstSize = MRI->getType(DstReg).getSizeInBits(); + unsigned InsSize = Src1Ty.getSizeInBits(); int64_t Offset = I.getOperand(3).getImm(); if (Offset % 32 != 0) return false; - unsigned SubReg = TRI.getSubRegFromChannel(Offset / 32); - const DebugLoc &DL = I.getDebugLoc(); + unsigned SubReg = getSubRegForSizeAndOffset(TRI, InsSize, Offset); + if (SubReg == AMDGPU::NoSubRegister) + return false; + + const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI); + const TargetRegisterClass *DstRC = + TRI.getRegClassForSizeOnBank(DstSize, *DstBank, *MRI); + if (!DstRC) + return false; - MachineInstr *Ins = BuildMI(*BB, &I, DL, TII.get(TargetOpcode::INSERT_SUBREG)) - .addDef(I.getOperand(0).getReg()) - .addReg(Src0Reg) - .addReg(Src1Reg) - .addImm(SubReg); + const RegisterBank *Src0Bank = RBI.getRegBank(Src0Reg, *MRI, TRI); + const RegisterBank *Src1Bank = RBI.getRegBank(Src1Reg, *MRI, TRI); + const TargetRegisterClass *Src0RC = + TRI.getRegClassForSizeOnBank(DstSize, *Src0Bank, *MRI); + const TargetRegisterClass *Src1RC = + TRI.getRegClassForSizeOnBank(InsSize, *Src1Bank, *MRI); + + // Deal with weird cases where the class only partially supports the subreg + // index. + Src0RC = TRI.getSubClassWithSubReg(Src0RC, SubReg); + if (!Src0RC) + return false; - for (const MachineOperand &MO : Ins->operands()) { - if (!MO.isReg()) - continue; - if (Register::isPhysicalRegister(MO.getReg())) - continue; + if (!RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) || + !RBI.constrainGenericRegister(Src0Reg, *Src0RC, *MRI) || + !RBI.constrainGenericRegister(Src1Reg, *Src1RC, *MRI)) + return false; + + const DebugLoc &DL = I.getDebugLoc(); + BuildMI(*BB, &I, DL, TII.get(TargetOpcode::INSERT_SUBREG), DstReg) + .addReg(Src0Reg) + .addReg(Src1Reg) + .addImm(SubReg); - const TargetRegisterClass *RC = - TRI.getConstrainedRegClassForOperand(MO, *MRI); - if (!RC) - continue; - RBI.constrainGenericRegister(MO.getReg(), *RC, *MRI); - } I.eraseFromParent(); return true; } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.mir index 93e35ead4d49a..3cd1b463b5790 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.mir @@ -1,32 +1,35 @@ -# RUN: llc -march=amdgcn -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s + --- -name: insert512 +name: insert_s512_s32 legalized: true regBankSelected: true -# CHECK-LABEL: insert512 -# CHECK: [[BASE:%[0-9]+]]:sreg_512 = IMPLICIT_DEF -# CHECK: [[VAL:%[0-9]+]]:sreg_32_xm0 = IMPLICIT_DEF -# CHECK: [[BASE0:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[BASE]], [[VAL]], %subreg.sub0 -# CHECK: [[BASE1:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[BASE0]], [[VAL]], %subreg.sub1 -# CHECK: [[BASE2:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[BASE1]], [[VAL]], %subreg.sub2 -# CHECK: [[BASE3:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[BASE2]], [[VAL]], %subreg.sub3 -# CHECK: [[BASE4:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[BASE3]], [[VAL]], %subreg.sub4 -# CHECK: [[BASE5:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[BASE4]], [[VAL]], %subreg.sub5 -# CHECK: [[BASE6:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[BASE5]], [[VAL]], %subreg.sub6 -# CHECK: [[BASE7:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[BASE6]], [[VAL]], %subreg.sub7 -# CHECK: [[BASE8:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[BASE7]], [[VAL]], %subreg.sub8 -# CHECK: [[BASE9:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[BASE8]], [[VAL]], %subreg.sub9 -# CHECK: [[BASE10:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[BASE9]], [[VAL]], %subreg.sub10 -# CHECK: [[BASE11:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[BASE10]], [[VAL]], %subreg.sub11 -# CHECK: [[BASE12:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[BASE11]], [[VAL]], %subreg.sub12 -# CHECK: [[BASE13:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[BASE12]], [[VAL]], %subreg.sub13 -# CHECK: [[BASE14:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[BASE13]], [[VAL]], %subreg.sub14 -# CHECK: [[BASE15:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[BASE14]], [[VAL]], %subreg.sub15 - body: | bb.0: + ; CHECK-LABEL: name: insert_s512_s32 + ; CHECK: [[DEF:%[0-9]+]]:sreg_512 = IMPLICIT_DEF + ; CHECK: [[DEF1:%[0-9]+]]:sreg_32_xm0 = IMPLICIT_DEF + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[DEF]], [[DEF1]], %subreg.sub0 + ; CHECK: [[INSERT_SUBREG1:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[INSERT_SUBREG]], [[DEF1]], %subreg.sub1 + ; CHECK: [[INSERT_SUBREG2:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[INSERT_SUBREG1]], [[DEF1]], %subreg.sub2 + ; CHECK: [[INSERT_SUBREG3:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[INSERT_SUBREG2]], [[DEF1]], %subreg.sub3 + ; CHECK: [[INSERT_SUBREG4:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[INSERT_SUBREG3]], [[DEF1]], %subreg.sub4 + ; CHECK: [[INSERT_SUBREG5:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[INSERT_SUBREG4]], [[DEF1]], %subreg.sub5 + ; CHECK: [[INSERT_SUBREG6:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[INSERT_SUBREG5]], [[DEF1]], %subreg.sub6 + ; CHECK: [[INSERT_SUBREG7:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[INSERT_SUBREG6]], [[DEF1]], %subreg.sub7 + ; CHECK: [[INSERT_SUBREG8:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[INSERT_SUBREG7]], [[DEF1]], %subreg.sub8 + ; CHECK: [[INSERT_SUBREG9:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[INSERT_SUBREG8]], [[DEF1]], %subreg.sub9 + ; CHECK: [[INSERT_SUBREG10:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[INSERT_SUBREG9]], [[DEF1]], %subreg.sub10 + ; CHECK: [[INSERT_SUBREG11:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[INSERT_SUBREG10]], [[DEF1]], %subreg.sub11 + ; CHECK: [[INSERT_SUBREG12:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[INSERT_SUBREG11]], [[DEF1]], %subreg.sub12 + ; CHECK: [[INSERT_SUBREG13:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[INSERT_SUBREG12]], [[DEF1]], %subreg.sub13 + ; CHECK: [[INSERT_SUBREG14:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[INSERT_SUBREG13]], [[DEF1]], %subreg.sub14 + ; CHECK: [[INSERT_SUBREG15:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[INSERT_SUBREG14]], [[DEF1]], %subreg.sub15 + ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[INSERT_SUBREG15]] + ; CHECK: SI_RETURN_TO_EPILOG $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 %0:sgpr(s512) = G_IMPLICIT_DEF %1:sgpr(s32) = G_IMPLICIT_DEF %2:sgpr(s512) = G_INSERT %0:sgpr, %1:sgpr(s32), 0 @@ -47,3 +50,403 @@ body: | %17:sgpr(s512) = G_INSERT %16:sgpr, %1:sgpr(s32), 480 $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY %17:sgpr(s512) SI_RETURN_TO_EPILOG $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + +--- + +name: insert_v_s64_v_s32_0 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + %0:vgpr(s64) = COPY $vgpr0_vgpr1 + %1:vgpr(s32) = COPY $vgpr2 + %2:vgpr(s64) = G_INSERT %0, %1, 0 + S_ENDPGM 0, implicit %2 +... + +--- + +name: insert_v_s64_v_s32_32 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + ; CHECK-LABEL: name: insert_v_s64_v_s32_32 + ; CHECK: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:vreg_64 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub1 + ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]] + %0:vgpr(s64) = COPY $vgpr0_vgpr1 + %1:vgpr(s32) = COPY $vgpr2 + %2:vgpr(s64) = G_INSERT %0, %1, 32 + S_ENDPGM 0, implicit %2 +... + +--- + +name: insert_s_s64_s_s32_0 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1, $sgpr2 + ; CHECK-LABEL: name: insert_s_s64_s_s32_0 + ; CHECK: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2 + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:sreg_64_xexec = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub0 + ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]] + %0:sgpr(s64) = COPY $sgpr0_sgpr1 + %1:sgpr(s32) = COPY $sgpr2 + %2:sgpr(s64) = G_INSERT %0, %1, 0 + S_ENDPGM 0, implicit %2 +... + +--- + +name: insert_s_s64_s_s32_32 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1, $sgpr2 + ; CHECK-LABEL: name: insert_s_s64_s_s32_32 + ; CHECK: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2 + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:sreg_64_xexec = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub1 + ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]] + %0:sgpr(s64) = COPY $sgpr0_sgpr1 + %1:sgpr(s32) = COPY $sgpr2 + %2:sgpr(s64) = G_INSERT %0, %1, 32 + S_ENDPGM 0, implicit %2 +... + +--- + +name: insert_s_s64_v_s32_32 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1, $vgpr0 + ; CHECK-LABEL: name: insert_s_s64_v_s32_32 + ; CHECK: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:vreg_64 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub1 + ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]] + %0:sgpr(s64) = COPY $sgpr0_sgpr1 + %1:vgpr(s32) = COPY $vgpr2 + %2:vgpr(s64) = G_INSERT %0, %1, 32 + S_ENDPGM 0, implicit %2 +... + +--- + +name: insert_v_s64_s_s32_32 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $sgpr0 + ; CHECK-LABEL: name: insert_v_s64_s_s32_32 + ; CHECK: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:vreg_64 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub1 + ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]] + %0:vgpr(s64) = COPY $vgpr0_vgpr1 + %1:sgpr(s32) = COPY $sgpr0 + %2:vgpr(s64) = G_INSERT %0, %1, 32 + S_ENDPGM 0, implicit %2 +... + +--- + +name: insert_v_s96_v_s64_0 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4 + ; CHECK-LABEL: name: insert_v_s96_v_s64_0 + ; CHECK: [[COPY:%[0-9]+]]:vreg_96 = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:vreg_96 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub0_sub1 + ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]] + %0:vgpr(s96) = COPY $vgpr0_vgpr1_vgpr2 + %1:vgpr(s64) = COPY $vgpr3_vgpr4 + %2:vgpr(s96) = G_INSERT %0, %1, 0 + S_ENDPGM 0, implicit %2 +... + +--- + +name: insert_v_s96_v_s64_32 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4 + ; CHECK-LABEL: name: insert_v_s96_v_s64_32 + ; CHECK: [[COPY:%[0-9]+]]:vreg_96 = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:vreg_96 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub1_sub2 + ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]] + %0:vgpr(s96) = COPY $vgpr0_vgpr1_vgpr2 + %1:vgpr(s64) = COPY $vgpr3_vgpr4 + %2:vgpr(s96) = G_INSERT %0, %1, 32 + S_ENDPGM 0, implicit %2 +... + +--- + +name: insert_s_s96_s_s64_0 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2, $sgpr4_sgpr5 + ; CHECK-LABEL: name: insert_s_s96_s_s64_0 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_96_with_sub0_sub1 = COPY $sgpr0_sgpr1_sgpr2 + ; CHECK: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $sgpr4_sgpr5 + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:sreg_96 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub0_sub1 + ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]] + %0:sgpr(s96) = COPY $sgpr0_sgpr1_sgpr2 + %1:sgpr(s64) = COPY $sgpr4_sgpr5 + %2:sgpr(s96) = G_INSERT %0, %1, 0 + S_ENDPGM 0, implicit %2 +... + +--- + +name: insert_s_s96_s_s64_32 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2, $sgpr4_sgpr5 + ; CHECK-LABEL: name: insert_s_s96_s_s64_32 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_96_with_sub1_sub2 = COPY $sgpr0_sgpr1_sgpr2 + ; CHECK: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $sgpr4_sgpr5 + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:sreg_96 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub1_sub2 + ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]] + %0:sgpr(s96) = COPY $sgpr0_sgpr1_sgpr2 + %1:sgpr(s64) = COPY $sgpr4_sgpr5 + %2:sgpr(s96) = G_INSERT %0, %1, 32 + S_ENDPGM 0, implicit %2 +... + +--- + +name: insert_s_s128_s_s64_0 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5 + ; CHECK-LABEL: name: insert_s_s128_s_s64_0 + ; CHECK: [[COPY:%[0-9]+]]:sreg_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $sgpr4_sgpr5 + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:sreg_128 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub0_sub1 + ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]] + %0:sgpr(s128) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %1:sgpr(s64) = COPY $sgpr4_sgpr5 + %2:sgpr(s128) = G_INSERT %0, %1, 0 + S_ENDPGM 0, implicit %2 +... + +# --- + +# name: insert_s_s128_s_s64_32 +# legalized: true +# regBankSelected: true + +# body: | +# bb.0: +# liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5 +# %0:sgpr(s128) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 +# %1:sgpr(s64) = COPY $sgpr4_sgpr5 +# %2:sgpr(s128) = G_INSERT %0, %1, 32 +# S_ENDPGM 0, implicit %2 +# ... + +--- + +name: insert_s_s128_s_s64_64 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5 + ; CHECK-LABEL: name: insert_s_s128_s_s64_64 + ; CHECK: [[COPY:%[0-9]+]]:sreg_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $sgpr4_sgpr5 + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:sreg_128 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub2_sub3 + ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]] + %0:sgpr(s128) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %1:sgpr(s64) = COPY $sgpr4_sgpr5 + %2:sgpr(s128) = G_INSERT %0, %1, 64 + S_ENDPGM 0, implicit %2 +... + +--- + +name: insert_s_s256_s_s64_96 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9 + ; CHECK-LABEL: name: insert_s_s256_s_s64_96 + ; CHECK: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; CHECK: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $sgpr8_sgpr9 + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:sreg_256 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub4_sub5 + ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]] + %0:sgpr(s256) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + %1:sgpr(s64) = COPY $sgpr8_sgpr9 + %2:sgpr(s256) = G_INSERT %0, %1, 96 + S_ENDPGM 0, implicit %2 +... + +# --- + +# name: insert_s_s256_s_s64_128 +# legalized: true +# regBankSelected: true + +# body: | +# bb.0: +# liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9 +# %0:sgpr(s256) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 +# %1:sgpr(s64) = COPY $sgpr4_sgpr5 +# %2:sgpr(s256) = G_INSERT %0, %1, 128 +# S_ENDPGM 0, implicit %2 +# ... + +# --- + +# name: insert_s_s256_s_s64_160 +# legalized: true +# regBankSelected: true + +# body: | +# bb.0: +# liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9 +# %0:sgpr(s256) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 +# %1:sgpr(s64) = COPY $sgpr4_sgpr5 +# %2:sgpr(s256) = G_INSERT %0, %1, 160 +# S_ENDPGM 0, implicit %2 +# ... + +--- + +name: insert_s_s128_s_s96_0 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr6_sgpr7_sgpr8 + ; CHECK-LABEL: name: insert_s_s128_s_s96_0 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_128_with_sub0_sub1_sub2 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK: [[COPY1:%[0-9]+]]:sreg_96 = COPY $sgpr6_sgpr7_sgpr8 + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:sreg_128 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub0_sub1_sub2 + ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]] + %0:sgpr(s128) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %1:sgpr(s96) = COPY $sgpr6_sgpr7_sgpr8 + %2:sgpr(s128) = G_INSERT %0, %1, 0 + S_ENDPGM 0, implicit %2 +... + +--- + +name: insert_s_s128_s_s96_32 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr6_sgpr7_sgpr8 + ; CHECK-LABEL: name: insert_s_s128_s_s96_32 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_128_with_sub1_sub2_sub3 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK: [[COPY1:%[0-9]+]]:sreg_96 = COPY $sgpr6_sgpr7_sgpr8 + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:sreg_128 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub1_sub2_sub3 + ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]] + %0:sgpr(s128) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %1:sgpr(s96) = COPY $sgpr6_sgpr7_sgpr8 + %2:sgpr(s128) = G_INSERT %0, %1, 32 + S_ENDPGM 0, implicit %2 +... + +--- + +name: insert_s_s160_s_s96_0 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4, $sgpr6_sgpr7_sgpr8 + ; CHECK-LABEL: name: insert_s_s160_s_s96_0 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_160_with_sub0_sub1_sub2 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4 + ; CHECK: [[COPY1:%[0-9]+]]:sreg_96 = COPY $sgpr6_sgpr7_sgpr8 + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:sreg_160 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub0_sub1_sub2 + ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]] + %0:sgpr(s160) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4 + %1:sgpr(s96) = COPY $sgpr6_sgpr7_sgpr8 + %2:sgpr(s160) = G_INSERT %0, %1, 0 + S_ENDPGM 0, implicit %2 +... + +--- + +name: insert_s_s160_s_s96_32 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4, $sgpr6_sgpr7_sgpr8 + ; CHECK-LABEL: name: insert_s_s160_s_s96_32 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_160_with_sub1_sub2_sub3 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4 + ; CHECK: [[COPY1:%[0-9]+]]:sreg_96 = COPY $sgpr6_sgpr7_sgpr8 + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:sreg_160 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub1_sub2_sub3 + ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]] + %0:sgpr(s160) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4 + %1:sgpr(s96) = COPY $sgpr6_sgpr7_sgpr8 + %2:sgpr(s160) = G_INSERT %0, %1, 32 + S_ENDPGM 0, implicit %2 +... + +--- + +name: insert_s_s160_s_s96_64 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4, $sgpr6_sgpr7_sgpr8 + ; CHECK-LABEL: name: insert_s_s160_s_s96_64 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_160_with_sub2_sub3_sub4 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4 + ; CHECK: [[COPY1:%[0-9]+]]:sreg_96 = COPY $sgpr6_sgpr7_sgpr8 + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:sreg_160 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub2_sub3_sub4 + ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]] + %0:sgpr(s160) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4 + %1:sgpr(s96) = COPY $sgpr6_sgpr7_sgpr8 + %2:sgpr(s160) = G_INSERT %0, %1, 64 + S_ENDPGM 0, implicit %2 +... From bef93a98cd26012049b8e64bf27134885fcf9550 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Mon, 7 Oct 2019 18:54:57 +0000 Subject: [PATCH 141/254] [OPENMP50]Treat range-based for as canonical loop. According to OpenMP 5.0, range-based for is also considered as a canonical form of loops. llvm-svn: 373939 --- clang/include/clang/AST/StmtOpenMP.h | 17 +- clang/include/clang/Sema/Sema.h | 4 + clang/lib/CodeGen/CGStmtOpenMP.cpp | 33 ++++ clang/lib/Parse/ParseDecl.cpp | 2 + clang/lib/Sema/SemaOpenMP.cpp | 49 ++++-- clang/lib/Sema/SemaStmt.cpp | 5 + ...ribute_parallel_for_simd_loop_messages.cpp | 2 +- .../OpenMP/distribute_simd_loop_messages.cpp | 2 +- clang/test/OpenMP/for_ast_print.cpp | 26 +-- clang/test/OpenMP/for_loop_messages.cpp | 2 +- clang/test/OpenMP/for_simd_loop_messages.cpp | 2 +- clang/test/OpenMP/parallel_for_codegen.cpp | 159 ++++++++++++++++++ .../OpenMP/parallel_for_loop_messages.cpp | 2 +- .../parallel_for_simd_loop_messages.cpp | 2 +- clang/test/OpenMP/simd_loop_messages.cpp | 2 +- .../target_parallel_for_loop_messages.cpp | 2 +- ...target_parallel_for_simd_loop_messages.cpp | 2 +- .../test/OpenMP/target_simd_loop_messages.cpp | 2 +- .../target_teams_distribute_loop_messages.cpp | 2 +- ..._distribute_parallel_for_loop_messages.cpp | 2 +- ...ribute_parallel_for_simd_loop_messages.cpp | 2 +- clang/test/OpenMP/taskloop_loop_messages.cpp | 2 +- .../OpenMP/taskloop_simd_loop_messages.cpp | 2 +- .../OpenMP/teams_distribute_loop_messages.cpp | 2 +- ..._distribute_parallel_for_loop_messages.cpp | 2 +- ...ribute_parallel_for_simd_loop_messages.cpp | 2 +- .../teams_distribute_simd_loop_messages.cpp | 2 +- 27 files changed, 291 insertions(+), 42 deletions(-) diff --git a/clang/include/clang/AST/StmtOpenMP.h b/clang/include/clang/AST/StmtOpenMP.h index c9efe32387159..ef69158d61f94 100644 --- a/clang/include/clang/AST/StmtOpenMP.h +++ b/clang/include/clang/AST/StmtOpenMP.h @@ -17,6 +17,7 @@ #include "clang/AST/Expr.h" #include "clang/AST/OpenMPClause.h" #include "clang/AST/Stmt.h" +#include "clang/AST/StmtCXX.h" #include "clang/Basic/OpenMPKinds.h" #include "clang/Basic/SourceLocation.h" @@ -1087,10 +1088,22 @@ class OMPLoopDirective : public OMPExecutableDirective { // This relies on the loop form is already checked by Sema. const Stmt *Body = getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers(); - Body = cast(Body)->getBody(); + if (auto *For = dyn_cast(Body)) { + Body = For->getBody(); + } else { + assert(isa(Body) && + "Expected caonical for loop or range-based for loop."); + Body = cast(Body)->getBody(); + } for (unsigned Cnt = 1; Cnt < CollapsedNum; ++Cnt) { Body = Body->IgnoreContainers(); - Body = cast(Body)->getBody(); + if (auto *For = dyn_cast(Body)) { + Body = For->getBody(); + } else { + assert(isa(Body) && + "Expected caonical for loop or range-based for loop."); + Body = cast(Body)->getBody(); + } } return Body; } diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 402ee4f858af2..a77cbc09e400e 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -9154,6 +9154,10 @@ class Sema { /// construct. void startOpenMPLoop(); + /// If the current region is a range loop-based region, mark the start of the + /// loop construct. + void startOpenMPCXXRangeFor(); + /// Check if the specified variable is used in 'private' clause. /// \param Level Relative level of nested OpenMP construct for that the check /// is performed. diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index 1ab6a8d963be5..c70244d779405 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -142,6 +142,24 @@ class OMPLoopScope : public CodeGenFunction::RunCleanupsScope { } } (void)PreCondVars.apply(CGF); + // Emit init, __range and __end variables for C++ range loops. + const Stmt *Body = + S.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers(); + for (unsigned Cnt = 0; Cnt < S.getCollapsedNumber(); ++Cnt) { + Body = Body->IgnoreContainers(); + if (auto *For = dyn_cast(Body)) { + Body = For->getBody(); + } else { + assert(isa(Body) && + "Expected caonical for loop or range-based for loop."); + auto *CXXFor = cast(Body); + if (const Stmt *Init = CXXFor->getInit()) + CGF.EmitStmt(Init); + CGF.EmitStmt(CXXFor->getRangeStmt()); + CGF.EmitStmt(CXXFor->getEndStmt()); + Body = CXXFor->getBody(); + } + } if (const auto *PreInits = cast_or_null(S.getPreInits())) { for (const auto *I : PreInits->decls()) CGF.EmitVarDecl(cast(*I)); @@ -1350,6 +1368,21 @@ void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D, getProfileCount(D.getBody())); EmitBlock(NextBB); } + // Emit loop variables for C++ range loops. + const Stmt *Body = + D.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers(); + for (unsigned Cnt = 0; Cnt < D.getCollapsedNumber(); ++Cnt) { + Body = Body->IgnoreContainers(); + if (auto *For = dyn_cast(Body)) { + Body = For->getBody(); + } else { + assert(isa(Body) && + "Expected caonical for loop or range-based for loop."); + auto *CXXFor = cast(Body); + EmitStmt(CXXFor->getLoopVarStmt()); + Body = CXXFor->getBody(); + } + } // Emit loop body. EmitStmt(D.getBody()); // The end (updates/cleanups). diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp index 942c520a0b50f..1ef8b984e4dbf 100644 --- a/clang/lib/Parse/ParseDecl.cpp +++ b/clang/lib/Parse/ParseDecl.cpp @@ -2100,6 +2100,8 @@ Parser::DeclGroupPtrTy Parser::ParseDeclGroup(ParsingDeclSpec &DS, bool IsForRangeLoop = false; if (TryConsumeToken(tok::colon, FRI->ColonLoc)) { IsForRangeLoop = true; + if (getLangOpts().OpenMP) + Actions.startOpenMPCXXRangeFor(); if (Tok.is(tok::l_brace)) FRI->RangeExpr = ParseBraceInitializer(); else diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 7ec5ba335fd7b..b0fd6aa5fd5fe 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -2008,6 +2008,14 @@ void Sema::startOpenMPLoop() { DSAStack->loopInit(); } +void Sema::startOpenMPCXXRangeFor() { + assert(LangOpts.OpenMP && "OpenMP must be enabled."); + if (isOpenMPLoopDirective(DSAStack->getCurrentDirective())) { + DSAStack->resetPossibleLoopCounter(); + DSAStack->loopStart(); + } +} + bool Sema::isOpenMPPrivateDecl(const ValueDecl *D, unsigned Level) const { assert(LangOpts.OpenMP && "OpenMP is not allowed"); if (isOpenMPLoopDirective(DSAStack->getCurrentDirective())) { @@ -6490,10 +6498,13 @@ static bool checkOpenMPIterationSpace( Sema::VarsWithInheritedDSAType &VarsWithImplicitDSA, llvm::MutableArrayRef ResultIterSpaces, llvm::MapVector &Captures) { - // OpenMP [2.6, Canonical Loop Form] + // OpenMP [2.9.1, Canonical Loop Form] // for (init-expr; test-expr; incr-expr) structured-block + // for (range-decl: range-expr) structured-block auto *For = dyn_cast_or_null(S); - if (!For) { + auto *CXXFor = dyn_cast_or_null(S); + // Ranged for is supported only in OpenMP 5.0. + if (!For && (SemaRef.LangOpts.OpenMP <= 45 || !CXXFor)) { SemaRef.Diag(S->getBeginLoc(), diag::err_omp_not_for) << (CollapseLoopCountExpr != nullptr || OrderedLoopCountExpr != nullptr) << getOpenMPDirectiveName(DKind) << TotalNestedLoopCount @@ -6515,12 +6526,14 @@ static bool checkOpenMPIterationSpace( } return true; } - assert(For->getBody()); + assert(((For && For->getBody()) || (CXXFor && CXXFor->getBody())) && + "No loop body."); - OpenMPIterationSpaceChecker ISC(SemaRef, DSA, For->getForLoc()); + OpenMPIterationSpaceChecker ISC(SemaRef, DSA, + For ? For->getForLoc() : CXXFor->getForLoc()); // Check init. - Stmt *Init = For->getInit(); + Stmt *Init = For ? For->getInit() : CXXFor->getBeginStmt(); if (ISC.checkAndSetInit(Init)) return true; @@ -6556,18 +6569,18 @@ static bool checkOpenMPIterationSpace( assert(isOpenMPLoopDirective(DKind) && "DSA for non-loop vars"); // Check test-expr. - HasErrors |= ISC.checkAndSetCond(For->getCond()); + HasErrors |= ISC.checkAndSetCond(For ? For->getCond() : CXXFor->getCond()); // Check incr-expr. - HasErrors |= ISC.checkAndSetInc(For->getInc()); + HasErrors |= ISC.checkAndSetInc(For ? For->getInc() : CXXFor->getInc()); } if (ISC.dependent() || SemaRef.CurContext->isDependentContext() || HasErrors) return HasErrors; // Build the loop's iteration space representation. - ResultIterSpaces[CurrentNestedLoopCount].PreCond = - ISC.buildPreCond(DSA.getCurScope(), For->getCond(), Captures); + ResultIterSpaces[CurrentNestedLoopCount].PreCond = ISC.buildPreCond( + DSA.getCurScope(), For ? For->getCond() : CXXFor->getCond(), Captures); ResultIterSpaces[CurrentNestedLoopCount].NumIterations = ISC.buildNumIterations(DSA.getCurScope(), ResultIterSpaces, (isOpenMPWorksharingDirective(DKind) || @@ -6881,7 +6894,14 @@ checkOpenMPLoop(OpenMPDirectiveKind DKind, Expr *CollapseLoopCountExpr, // All loops associated with the construct must be perfectly nested; that // is, there must be no intervening code nor any OpenMP directive between // any two loops. - CurStmt = cast(CurStmt)->getBody()->IgnoreContainers(); + if (auto *For = dyn_cast(CurStmt)) { + CurStmt = For->getBody(); + } else { + assert(isa(CurStmt) && + "Expected canonical for or range-based for loops."); + CurStmt = cast(CurStmt)->getBody(); + } + CurStmt = CurStmt->IgnoreContainers(); } for (unsigned Cnt = NestedLoopCount; Cnt < OrderedLoopCount; ++Cnt) { if (checkOpenMPIterationSpace( @@ -6901,7 +6921,14 @@ checkOpenMPLoop(OpenMPDirectiveKind DKind, Expr *CollapseLoopCountExpr, // All loops associated with the construct must be perfectly nested; that // is, there must be no intervening code nor any OpenMP directive between // any two loops. - CurStmt = cast(CurStmt)->getBody()->IgnoreContainers(); + if (auto *For = dyn_cast(CurStmt)) { + CurStmt = For->getBody(); + } else { + assert(isa(CurStmt) && + "Expected canonical for or range-based for loops."); + CurStmt = cast(CurStmt)->getBody(); + } + CurStmt = CurStmt->IgnoreContainers(); } Built.clear(/* size */ NestedLoopCount); diff --git a/clang/lib/Sema/SemaStmt.cpp b/clang/lib/Sema/SemaStmt.cpp index cff20aab03081..6c680f29da4f9 100644 --- a/clang/lib/Sema/SemaStmt.cpp +++ b/clang/lib/Sema/SemaStmt.cpp @@ -2674,6 +2674,11 @@ StmtResult Sema::BuildCXXForRangeStmt(SourceLocation ForLoc, if (Kind == BFRK_Check) return StmtResult(); + // In OpenMP loop region loop control variable must be private. Perform + // analysis of first part (if any). + if (getLangOpts().OpenMP >= 50 && BeginDeclStmt.isUsable()) + ActOnOpenMPLoopInitialization(ForLoc, BeginDeclStmt.get()); + return new (Context) CXXForRangeStmt( InitStmt, RangeDS, cast_or_null(BeginDeclStmt.get()), cast_or_null(EndDeclStmt.get()), NotEqExpr.get(), diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_loop_messages.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_loop_messages.cpp index 64c1b9130bcfe..8fd035262ecf3 100644 --- a/clang/test/OpenMP/distribute_parallel_for_simd_loop_messages.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_simd_loop_messages.cpp @@ -379,7 +379,7 @@ int test_iteration_spaces() { #pragma omp target #pragma omp teams -// expected-error@+2 {{statement after '#pragma omp distribute parallel for simd' must be a for loop}} +// omp4-error@+2 {{statement after '#pragma omp distribute parallel for simd' must be a for loop}} #pragma omp distribute parallel for simd for (auto &item : a) { item = item + 1; diff --git a/clang/test/OpenMP/distribute_simd_loop_messages.cpp b/clang/test/OpenMP/distribute_simd_loop_messages.cpp index d4869f1c79af5..423e5f1b1166a 100644 --- a/clang/test/OpenMP/distribute_simd_loop_messages.cpp +++ b/clang/test/OpenMP/distribute_simd_loop_messages.cpp @@ -376,7 +376,7 @@ int test_iteration_spaces() { #pragma omp target #pragma omp teams - // expected-error@+2 {{statement after '#pragma omp distribute simd' must be a for loop}} + // omp4-error@+2 {{statement after '#pragma omp distribute simd' must be a for loop}} #pragma omp distribute simd for (auto &item : a) { item = item + 1; diff --git a/clang/test/OpenMP/for_ast_print.cpp b/clang/test/OpenMP/for_ast_print.cpp index cdbe9aaffdd50..edb4b9a7ac0ef 100644 --- a/clang/test/OpenMP/for_ast_print.cpp +++ b/clang/test/OpenMP/for_ast_print.cpp @@ -1,10 +1,10 @@ -// RUN: %clang_cc1 -verify -fopenmp -ast-print %s | FileCheck %s -// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -ast-print %s | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s -// RUN: %clang_cc1 -verify -fopenmp-simd -ast-print %s | FileCheck %s -// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp-simd -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -ast-print %s | FileCheck %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s // expected-no-diagnostics #ifndef HEADER @@ -103,6 +103,7 @@ class S8 : public S7 { template T tmain(T argc) { T b = argc, c, d, e, f, g; + T arr[N]; static T a; // CHECK: static T a; #pragma omp for schedule(dynamic) linear(a) allocate(a) @@ -113,6 +114,7 @@ T tmain(T argc) { // CHECK-NEXT: a = 2; #pragma omp parallel #pragma omp for allocate(argc) private(argc, b), firstprivate(c, d), lastprivate(d, f) collapse(N) schedule(static, N) ordered(N) nowait + for (auto &x : arr) for (int i = 0; i < 2; ++i) for (int j = 0; j < 2; ++j) for (int j = 0; j < 2; ++j) @@ -126,6 +128,7 @@ T tmain(T argc) { foo(); // CHECK-NEXT: #pragma omp parallel // CHECK-NEXT: #pragma omp for allocate(argc) private(argc,b) firstprivate(c,d) lastprivate(d,f) collapse(N) schedule(static, N) ordered(N) nowait + // CHECK-NEXT: for (auto &x : arr) // CHECK-NEXT: for (int i = 0; i < 2; ++i) // CHECK-NEXT: for (int j = 0; j < 2; ++j) // CHECK-NEXT: for (int j = 0; j < 2; ++j) @@ -143,6 +146,7 @@ T tmain(T argc) { int main(int argc, char **argv) { // CHECK: int main(int argc, char **argv) { int b = argc, c, d, e, f, g; + float arr[20]; static int a; // CHECK: static int a; #pragma omp for schedule(guided, argc) reduction(+:argv[0][:1]) @@ -152,15 +156,17 @@ int main(int argc, char **argv) { // CHECK-NEXT: for (int i = 0; i < 2; ++i) // CHECK-NEXT: a = 2; #pragma omp parallel -#pragma omp for private(argc, b), firstprivate(argv, c), lastprivate(d, f) collapse(2) schedule(auto) ordered nowait linear(g:-1) +#pragma omp for private(argc, b), firstprivate(argv, c), lastprivate(d, f) collapse(3) schedule(auto) ordered nowait linear(g:-1) for (int i = 0; i < 10; ++i) for (int j = 0; j < 10; ++j) - foo(); + for (auto x : arr) + foo(), (void)x; // CHECK-NEXT: #pragma omp parallel - // CHECK-NEXT: #pragma omp for private(argc,b) firstprivate(argv,c) lastprivate(d,f) collapse(2) schedule(auto) ordered nowait linear(g: -1) + // CHECK-NEXT: #pragma omp for private(argc,b) firstprivate(argv,c) lastprivate(d,f) collapse(3) schedule(auto) ordered nowait linear(g: -1) // CHECK-NEXT: for (int i = 0; i < 10; ++i) // CHECK-NEXT: for (int j = 0; j < 10; ++j) - // CHECK-NEXT: foo(); + // CHECK-NEXT: for (auto x : arr) + // CHECK-NEXT: foo() , (void)x; char buf[9] = "01234567"; char *p, *q; #pragma omp parallel diff --git a/clang/test/OpenMP/for_loop_messages.cpp b/clang/test/OpenMP/for_loop_messages.cpp index 36727070eded5..73c69ede6d120 100644 --- a/clang/test/OpenMP/for_loop_messages.cpp +++ b/clang/test/OpenMP/for_loop_messages.cpp @@ -370,7 +370,7 @@ int test_iteration_spaces() { } #pragma omp parallel -// expected-error@+2 {{statement after '#pragma omp for' must be a for loop}} +// omp4-error@+2 {{statement after '#pragma omp for' must be a for loop}} #pragma omp for for (auto &item : a) { item = item + 1; diff --git a/clang/test/OpenMP/for_simd_loop_messages.cpp b/clang/test/OpenMP/for_simd_loop_messages.cpp index 4b62bcad03e83..99f30a64eb5e3 100644 --- a/clang/test/OpenMP/for_simd_loop_messages.cpp +++ b/clang/test/OpenMP/for_simd_loop_messages.cpp @@ -333,7 +333,7 @@ int test_iteration_spaces() { } #pragma omp parallel -// expected-error@+2 {{statement after '#pragma omp for simd' must be a for loop}} +// omp4-error@+2 {{statement after '#pragma omp for simd' must be a for loop}} #pragma omp for simd for (auto &item : a) { item = item + 1; diff --git a/clang/test/OpenMP/parallel_for_codegen.cpp b/clang/test/OpenMP/parallel_for_codegen.cpp index 9e3390214d78d..ffa715548a85a 100644 --- a/clang/test/OpenMP/parallel_for_codegen.cpp +++ b/clang/test/OpenMP/parallel_for_codegen.cpp @@ -469,6 +469,165 @@ int decrement_nowait () { return 0; // OMP5: ret i32 0 } + +// OMP5-LABEL: range_for_single +void range_for_single() { + int arr[10] = {0}; +// OMP5: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), [10 x i32]* %{{.+}}) +#pragma omp parallel for + for (auto &a : arr) + (void)a; +} + +// OMP5: define internal void @.omp_outlined.(i32* {{.+}}, i32* {{.+}}, [10 x i32]* dereferenceable(40) %arr) +// OMP5: [[ARR_ADDR:%.+]] = alloca [10 x i32]*, +// OMP5: [[IV:%.+]] = alloca i64, +// OMP5: [[RANGE_ADDR:%.+]] = alloca [10 x i32]*, +// OMP5: [[END_ADDR:%.+]] = alloca i32*, +// OMP5: alloca i32*, +// OMP5: alloca i32*, +// OMP5: alloca i64, +// OMP5: [[BEGIN_INIT:%.+]] = alloca i32*, +// OMP5: [[LB:%.+]] = alloca i64, +// OMP5: [[UB:%.+]] = alloca i64, +// OMP5: [[STRIDE:%.+]] = alloca i64, +// OMP5: [[IS_LAST:%.+]] = alloca i32, +// OMP5: [[BEGIN:%.+]] = alloca i32*, +// OMP5: [[A_PTR:%.+]] = alloca i32*, +// OMP5: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num( + +// __range = arr; +// OMP5: [[ARR:%.+]] = load [10 x i32]*, [10 x i32]** [[ARR_ADDR]], +// OMP5: store [10 x i32]* [[ARR]], [10 x i32]** [[RANGE_ADDR]], + +// __end = end(_range); +// OMP5: [[RANGE:%.+]] = load [10 x i32]*, [10 x i32]** [[RANGE_ADDR]], +// OMP5: [[RANGE_0:%.+]] = getelementptr inbounds [10 x i32], [10 x i32]* [[RANGE]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 +// OMP5: [[RANGE_10:%.+]] = getelementptr inbounds i32, i32* [[RANGE_0]], i{{[0-9]+}} 10 +// OMP5: store i32* [[RANGE_10]], i32** [[END_ADDR]], + +// OMP5: [[RANGE:%.+]] = load [10 x i32]*, [10 x i32]** [[RANGE_ADDR]], +// OMP5: [[RANGE_0:%.+]] = getelementptr inbounds [10 x i32], [10 x i32]* [[RANGE]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 +// OMP5: store i32* [[RANGE_0]], i32** [[CAP1:%.+]], +// OMP5: [[END:%.+]] = load i32*, i32** [[END_ADDR]], +// OMP5: store i32* [[END]], i32** [[CAP2:%.+]], + +// calculate number of elements. +// OMP5: [[CAP2_VAL:%.+]] = load i32*, i32** [[CAP2]], +// OMP5: [[CAP1_VAL:%.+]] = load i32*, i32** [[CAP1]], +// OMP5: [[CAP2_I64:%.+]] = ptrtoint i32* [[CAP2_VAL]] to i64 +// OMP5: [[CAP1_I64:%.+]] = ptrtoint i32* [[CAP1_VAL]] to i64 +// OMP5: [[DIFF:%.+]] = sub i64 [[CAP2_I64]], [[CAP1_I64]] +// OMP5: [[NUM:%.+]] = sdiv exact i64 [[DIFF]], 4 +// OMP5: [[NUM1:%.+]] = sub nsw i64 [[NUM]], 1 +// OMP5: [[NUM2:%.+]] = add nsw i64 [[NUM1]], 1 +// OMP5: [[NUM3:%.+]] = sdiv i64 [[NUM2]], 1 +// OMP5: [[NUM4:%.+]] = sub nsw i64 [[NUM3]], 1 +// OMP5: store i64 [[NUM4]], i64* [[CAP3:%.+]], +// OMP5: [[RANGE_0:%.+]] = load i32*, i32** [[CAP1]], + +// __begin = begin(range); +// OMP5: store i32* [[RANGE_0]], i32** [[BEGIN_INIT]], +// OMP5: [[CAP1_VAL:%.+]] = load i32*, i32** [[CAP1]], +// OMP5: [[CAP2_VAL:%.+]] = load i32*, i32** [[CAP2]], +// OMP5: [[CMP:%.+]] = icmp ult i32* [[CAP1_VAL]], [[CAP2_VAL]] + +// __begin >= __end ? goto then : goto exit; +// OMP5: br i1 [[CMP]], label %[[THEN:.+]], label %[[EXIT:.+]] + +// OMP5: [[THEN]]: + +// lb = 0; +// OMP5: store i64 0, i64* [[LB]], + +// ub = number of elements +// OMP5: [[NUM:%.+]] = load i64, i64* [[CAP3]], +// OMP5: store i64 [[NUM]], i64* [[UB]], + +// stride = 1; +// OMP5: store i64 1, i64* [[STRIDE]], + +// is_last = 0; +// OMP5: store i32 0, i32* [[IS_LAST]], + +// loop. +// OMP5: call void @__kmpc_for_static_init_8(%struct.ident_t* {{.+}}, i32 [[GTID]], i32 34, i32* [[IS_LAST]], i64* [[LB]], i64* [[UB]], i64* [[STRIDE]], i64 1, i64 1) + +// ub = (ub > number_of_elems ? number_of_elems : ub); +// OMP5: [[UB_VAL:%.+]] = load i64, i64* [[UB]], +// OMP5: [[NUM_VAL:%.+]] = load i64, i64* [[CAP3]], +// OMP5: [[CMP:%.+]] = icmp sgt i64 [[UB_VAL]], [[NUM_VAL]] +// OMP5: br i1 [[CMP]], label %[[TRUE:.+]], label %[[FALSE:.+]] + +// OMP5: [[TRUE]]: +// OMP5: [[NUM_VAL:%.+]] = load i64, i64* [[CAP3]], +// OMP5: br label %[[END:.+]] + +// OMP5: [[FALSE]]: +// OMP5: [[UB_VAL:%.+]] = load i64, i64* [[UB]], +// OMP5: br label %[[END:.+]] + +// OMP5: [[END]]: +// OMP5: [[MIN:%.+]] = phi i64 [ [[NUM_VAL]], %[[TRUE]] ], [ [[UB_VAL]], %[[FALSE]] ] +// OMP%: store i64 [[MIN]], i64* [[UB]], + +// iv = lb; +// OMP5: [[LB_VAL:%.+]] = load i64, i64* [[LB]], +// OMP5: store i64 [[LB_VAL]], i64* [[IV]], + +// goto loop; +// loop: +// OMP5: br label %[[LOOP:.+]] + +// OMP5: [[LOOP]]: + +// iv <= ub ? goto body : goto end; +// OMP5: [[IV_VAL:%.+]] = load i64, i64* [[IV]], +// OMP5: [[UB_VAL:%.+]] = load i64, i64* [[UB]], +// OMP5: [[CMP:%.+]] = icmp sle i64 [[IV_VAL]], [[UB_VAL]] +// OMP5: br i1 [[CMP]], label %[[BODY:.+]], label %[[END:.+]] + +// body: +// __begin = begin(arr) + iv * 1; +// OMP5: [[BODY]]: +// OMP5: [[CAP1_VAL:%.+]] = load i32*, i32** [[CAP1]], +// OMP5: [[IV_VAL:%.+]] = load i64, i64* [[IV]], +// OMP5: [[MUL:%.+]] = mul nsw i64 [[IV_VAL]], 1 +// OMP5: [[ADDR:%.+]] = getelementptr inbounds i32, i32* [[CAP1_VAL]], i64 [[MUL]] +// OMP5: store i32* [[ADDR]], i32** [[BEGIN]], + +// a = *__begin; +// OMP5: [[BEGIN_VAL:%.+]] = load i32*, i32** [[BEGIN]], +// OMP5: store i32* [[BEGIN_VAL]], i32** [[A_PTR]], + +// (void)a; +// OMP5: load i32*, i32** [[A_PTR]], + +// iv += 1; +// OMP5: [[IV_VAL:%.+]] = load i64, i64* [[IV]], +// OMP5: [[IV_VAL_ADD_1:%.+]] = add nsw i64 [[IV_VAL]], 1 +// OMP5: store i64 [[IV_VAL_ADD_1]], i64* [[IV]], + +// goto loop; +// OMP5: br label %[[LOOP]] + +// end: +// OMP5: [[END]]: +// OMP5: call void @__kmpc_for_static_fini(%struct.ident_t* {{.+}}, i32 [[GTID]]) +// exit: +// OMP5: [[EXIT]]: +// OMP5: ret void + +// OMP5-LABEL: range_for_collapsed +void range_for_collapsed() { + int arr[10] = {0}; +// OMP5: call void @__kmpc_for_static_init_8(%struct.ident_t* {{.+}}, i32 [[GTID%.+]], i32 34, i32* %{{.+}}, i64* %{{.+}}, i64* %{{.+}}, i64* %{{.+}}, i64 1, i64 1) +#pragma omp parallel for collapse(2) + for (auto &a : arr) + for (auto b : arr) + a = b; +// OMP5: call void @__kmpc_for_static_fini(%struct.ident_t* {{.+}}, i32 [[GTID]]) +} #endif // OMP5 #endif // HEADER diff --git a/clang/test/OpenMP/parallel_for_loop_messages.cpp b/clang/test/OpenMP/parallel_for_loop_messages.cpp index c234b52899f06..beaf56e7bcb6d 100644 --- a/clang/test/OpenMP/parallel_for_loop_messages.cpp +++ b/clang/test/OpenMP/parallel_for_loop_messages.cpp @@ -281,7 +281,7 @@ int test_iteration_spaces() { c[globalii] += a[globalii] + ii; } -// expected-error@+2 {{statement after '#pragma omp parallel for' must be a for loop}} +// omp4-error@+2 {{statement after '#pragma omp parallel for' must be a for loop}} #pragma omp parallel for for (auto &item : a) { item = item + 1; diff --git a/clang/test/OpenMP/parallel_for_simd_loop_messages.cpp b/clang/test/OpenMP/parallel_for_simd_loop_messages.cpp index 006e3e79a445f..5a0202c4e5766 100644 --- a/clang/test/OpenMP/parallel_for_simd_loop_messages.cpp +++ b/clang/test/OpenMP/parallel_for_simd_loop_messages.cpp @@ -283,7 +283,7 @@ int test_iteration_spaces() { c[globalii] += a[globalii] + ii; } -// expected-error@+2 {{statement after '#pragma omp parallel for simd' must be a for loop}} +// omp4-error@+2 {{statement after '#pragma omp parallel for simd' must be a for loop}} #pragma omp parallel for simd for (auto &item : a) { item = item + 1; diff --git a/clang/test/OpenMP/simd_loop_messages.cpp b/clang/test/OpenMP/simd_loop_messages.cpp index 701c52f1b9bb3..5a98be537615e 100644 --- a/clang/test/OpenMP/simd_loop_messages.cpp +++ b/clang/test/OpenMP/simd_loop_messages.cpp @@ -300,7 +300,7 @@ int test_iteration_spaces() { c[globalii] += a[globalii] + ii; } - // expected-error@+2 {{statement after '#pragma omp simd' must be a for loop}} + // omp4-error@+2 {{statement after '#pragma omp simd' must be a for loop}} #pragma omp simd for (auto &item : a) { item = item + 1; diff --git a/clang/test/OpenMP/target_parallel_for_loop_messages.cpp b/clang/test/OpenMP/target_parallel_for_loop_messages.cpp index 1aa32d543ec94..ec82d1c15e744 100644 --- a/clang/test/OpenMP/target_parallel_for_loop_messages.cpp +++ b/clang/test/OpenMP/target_parallel_for_loop_messages.cpp @@ -281,7 +281,7 @@ int test_iteration_spaces() { c[globalii] += a[globalii] + ii; } -// expected-error@+2 {{statement after '#pragma omp target parallel for' must be a for loop}} +// omp4-error@+2 {{statement after '#pragma omp target parallel for' must be a for loop}} #pragma omp target parallel for for (auto &item : a) { item = item + 1; diff --git a/clang/test/OpenMP/target_parallel_for_simd_loop_messages.cpp b/clang/test/OpenMP/target_parallel_for_simd_loop_messages.cpp index a17b13fe04465..3ec5f5d3817d8 100644 --- a/clang/test/OpenMP/target_parallel_for_simd_loop_messages.cpp +++ b/clang/test/OpenMP/target_parallel_for_simd_loop_messages.cpp @@ -283,7 +283,7 @@ int test_iteration_spaces() { c[globalii] += a[globalii] + ii; } -// expected-error@+2 {{statement after '#pragma omp target parallel for simd' must be a for loop}} +// omp4-error@+2 {{statement after '#pragma omp target parallel for simd' must be a for loop}} #pragma omp target parallel for simd for (auto &item : a) { item = item + 1; diff --git a/clang/test/OpenMP/target_simd_loop_messages.cpp b/clang/test/OpenMP/target_simd_loop_messages.cpp index 48fd7719eab8f..1e894efff8b23 100644 --- a/clang/test/OpenMP/target_simd_loop_messages.cpp +++ b/clang/test/OpenMP/target_simd_loop_messages.cpp @@ -283,7 +283,7 @@ int test_iteration_spaces() { c[globalii] += a[globalii] + ii; } -// expected-error@+2 {{statement after '#pragma omp target simd' must be a for loop}} +// omp4-error@+2 {{statement after '#pragma omp target simd' must be a for loop}} #pragma omp target simd for (auto &item : a) { item = item + 1; diff --git a/clang/test/OpenMP/target_teams_distribute_loop_messages.cpp b/clang/test/OpenMP/target_teams_distribute_loop_messages.cpp index 53fb9cac57282..4f8ca8d94b3bf 100644 --- a/clang/test/OpenMP/target_teams_distribute_loop_messages.cpp +++ b/clang/test/OpenMP/target_teams_distribute_loop_messages.cpp @@ -269,7 +269,7 @@ int test_iteration_spaces() { } #pragma omp target teams distribute -// expected-error@+1 {{statement after '#pragma omp target teams distribute' must be a for loop}} +// omp4-error@+1 {{statement after '#pragma omp target teams distribute' must be a for loop}} for (auto &item : a) { item = item + 1; } diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_loop_messages.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_loop_messages.cpp index f95211b905a77..9530acd2935f8 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_loop_messages.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_loop_messages.cpp @@ -267,7 +267,7 @@ int test_iteration_spaces() { } #pragma omp target teams distribute parallel for -// expected-error@+1 {{statement after '#pragma omp target teams distribute parallel for' must be a for loop}} +// omp4-error@+1 {{statement after '#pragma omp target teams distribute parallel for' must be a for loop}} for (auto &item : a) { item = item + 1; } diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_loop_messages.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_loop_messages.cpp index 3ab59b30654c1..8435ef625c748 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_loop_messages.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_loop_messages.cpp @@ -269,7 +269,7 @@ int test_iteration_spaces() { c[globalii] += a[globalii] + ii; } -// expected-error@+2 {{statement after '#pragma omp target teams distribute parallel for simd' must be a for loop}} +// omp4-error@+2 {{statement after '#pragma omp target teams distribute parallel for simd' must be a for loop}} #pragma omp target teams distribute parallel for simd for (auto &item : a) { item = item + 1; diff --git a/clang/test/OpenMP/taskloop_loop_messages.cpp b/clang/test/OpenMP/taskloop_loop_messages.cpp index 9a8b4aaea0411..1783a5265d9f5 100644 --- a/clang/test/OpenMP/taskloop_loop_messages.cpp +++ b/clang/test/OpenMP/taskloop_loop_messages.cpp @@ -350,7 +350,7 @@ int test_iteration_spaces() { } #pragma omp parallel -// expected-error@+2 {{statement after '#pragma omp taskloop' must be a for loop}} +// omp4-error@+2 {{statement after '#pragma omp taskloop' must be a for loop}} #pragma omp taskloop for (auto &item : a) { item = item + 1; diff --git a/clang/test/OpenMP/taskloop_simd_loop_messages.cpp b/clang/test/OpenMP/taskloop_simd_loop_messages.cpp index 72c9ec4844984..9a345d6a6300c 100644 --- a/clang/test/OpenMP/taskloop_simd_loop_messages.cpp +++ b/clang/test/OpenMP/taskloop_simd_loop_messages.cpp @@ -352,7 +352,7 @@ int test_iteration_spaces() { } #pragma omp parallel -// expected-error@+2 {{statement after '#pragma omp taskloop simd' must be a for loop}} +// omp4-error@+2 {{statement after '#pragma omp taskloop simd' must be a for loop}} #pragma omp taskloop simd for (auto &item : a) { item = item + 1; diff --git a/clang/test/OpenMP/teams_distribute_loop_messages.cpp b/clang/test/OpenMP/teams_distribute_loop_messages.cpp index 44cf9170977fd..617d0353b9482 100644 --- a/clang/test/OpenMP/teams_distribute_loop_messages.cpp +++ b/clang/test/OpenMP/teams_distribute_loop_messages.cpp @@ -317,7 +317,7 @@ int test_iteration_spaces() { #pragma omp target #pragma omp teams distribute -// expected-error@+1 {{statement after '#pragma omp teams distribute' must be a for loop}} +// omp4-error@+1 {{statement after '#pragma omp teams distribute' must be a for loop}} for (auto &item : a) { item = item + 1; } diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_loop_messages.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_loop_messages.cpp index d9b01c972f295..f5948027e56c3 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_loop_messages.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_loop_messages.cpp @@ -315,7 +315,7 @@ int test_iteration_spaces() { #pragma omp target #pragma omp teams distribute parallel for -// expected-error@+1 {{statement after '#pragma omp teams distribute parallel for' must be a for loop}} +// omp4-error@+1 {{statement after '#pragma omp teams distribute parallel for' must be a for loop}} for (auto &item : a) { item = item + 1; } diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_loop_messages.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_loop_messages.cpp index ba201e49ef7e0..4359aea70fa7a 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_loop_messages.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_loop_messages.cpp @@ -317,7 +317,7 @@ int test_iteration_spaces() { #pragma omp target #pragma omp teams distribute parallel for simd -// expected-error@+1 {{statement after '#pragma omp teams distribute parallel for simd' must be a for loop}} +// omp4-error@+1 {{statement after '#pragma omp teams distribute parallel for simd' must be a for loop}} for (auto &item : a) { item = item + 1; } diff --git a/clang/test/OpenMP/teams_distribute_simd_loop_messages.cpp b/clang/test/OpenMP/teams_distribute_simd_loop_messages.cpp index 6cb51839c2d98..b3020d1a610f5 100644 --- a/clang/test/OpenMP/teams_distribute_simd_loop_messages.cpp +++ b/clang/test/OpenMP/teams_distribute_simd_loop_messages.cpp @@ -317,7 +317,7 @@ int test_iteration_spaces() { #pragma omp target #pragma omp teams distribute simd -// expected-error@+1 {{statement after '#pragma omp teams distribute simd' must be a for loop}} +// omp4-error@+1 {{statement after '#pragma omp teams distribute simd' must be a for loop}} for (auto &item : a) { item = item + 1; } From d03068c3e1fbc8b8aa24af8e2a806fafa8a92e26 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Mon, 7 Oct 2019 18:55:15 +0000 Subject: [PATCH 142/254] [libc++abi] Do not define -Dcxxabi_shared_EXPORTS when building libc++abi CMake sets adds that definition automatically, but we don't need or use it. llvm-svn: 373940 --- libcxxabi/src/CMakeLists.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/libcxxabi/src/CMakeLists.txt b/libcxxabi/src/CMakeLists.txt index 8d4eb795170ea..0bb164d12ee4e 100644 --- a/libcxxabi/src/CMakeLists.txt +++ b/libcxxabi/src/CMakeLists.txt @@ -176,7 +176,9 @@ if (LIBCXXABI_ENABLE_SHARED) SOVERSION "1" VERSION - "1.0") + "1.0" + DEFINE_SYMBOL + "") if(LIBCXXABI_ENABLE_PIC) set_target_properties(cxxabi_shared PROPERTIES POSITION_INDEPENDENT_CODE ON) From 578fa2819fa64b60dfec66a3b970634c71a39841 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 7 Oct 2019 19:05:58 +0000 Subject: [PATCH 143/254] AMDGPU/GlobalISel: Widen 16-bit G_MERGE_VALUEs sources Continue making a mess of merge/unmerge legality. llvm-svn: 373942 --- .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 47 +- .../legalize-load-constant-32bit.mir | 8 +- .../GlobalISel/legalize-load-constant.mir | 3534 ++++++++++---- .../AMDGPU/GlobalISel/legalize-load-flat.mir | 2620 +++++++--- .../GlobalISel/legalize-load-global.mir | 2856 +++++++---- .../AMDGPU/GlobalISel/legalize-load-local.mir | 3882 ++++++++++----- .../GlobalISel/legalize-load-private.mir | 4206 ++++++++++------- .../GlobalISel/legalize-merge-values.mir | 186 +- 8 files changed, 11868 insertions(+), 5471 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 037e2e88a77ba..ce724e360ae5d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -988,7 +988,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, return false; }; - getActionDefinitionsBuilder(Op) + auto &Builder = getActionDefinitionsBuilder(Op) .widenScalarToNextPow2(LitTyIdx, /*Min*/ 16) // Clamp the little scalar to s8-s256 and make it a power of 2. It's not // worth considering the multiples of 64 since 2*192 and 2*384 are not @@ -1007,25 +1007,36 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, [=](const LegalityQuery &Query) { return notValidElt(Query, 1); }, scalarize(1)) .clampScalar(BigTyIdx, S32, S1024) - .lowerFor({{S16, V2S16}}) - .widenScalarIf( + .lowerFor({{S16, V2S16}}); + + if (Op == G_MERGE_VALUES) { + Builder.widenScalarIf( + // TODO: Use 16-bit shifts if legal for 8-bit values? [=](const LegalityQuery &Query) { - const LLT &Ty = Query.Types[BigTyIdx]; - return !isPowerOf2_32(Ty.getSizeInBits()) && - Ty.getSizeInBits() % 16 != 0; + const LLT Ty = Query.Types[LitTyIdx]; + return Ty.getSizeInBits() < 32; }, - [=](const LegalityQuery &Query) { - // Pick the next power of 2, or a multiple of 64 over 128. - // Whichever is smaller. - const LLT &Ty = Query.Types[BigTyIdx]; - unsigned NewSizeInBits = 1 << Log2_32_Ceil(Ty.getSizeInBits() + 1); - if (NewSizeInBits >= 256) { - unsigned RoundedTo = alignTo<64>(Ty.getSizeInBits() + 1); - if (RoundedTo < NewSizeInBits) - NewSizeInBits = RoundedTo; - } - return std::make_pair(BigTyIdx, LLT::scalar(NewSizeInBits)); - }) + changeTo(LitTyIdx, S32)); + } + + Builder.widenScalarIf( + [=](const LegalityQuery &Query) { + const LLT Ty = Query.Types[BigTyIdx]; + return !isPowerOf2_32(Ty.getSizeInBits()) && + Ty.getSizeInBits() % 16 != 0; + }, + [=](const LegalityQuery &Query) { + // Pick the next power of 2, or a multiple of 64 over 128. + // Whichever is smaller. + const LLT &Ty = Query.Types[BigTyIdx]; + unsigned NewSizeInBits = 1 << Log2_32_Ceil(Ty.getSizeInBits() + 1); + if (NewSizeInBits >= 256) { + unsigned RoundedTo = alignTo<64>(Ty.getSizeInBits() + 1); + if (RoundedTo < NewSizeInBits) + NewSizeInBits = RoundedTo; + } + return std::make_pair(BigTyIdx, LLT::scalar(NewSizeInBits)); + }) .legalIf([=](const LegalityQuery &Query) { const LLT &BigTy = Query.Types[BigTyIdx]; const LLT &LitTy = Query.Types[LitTyIdx]; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant-32bit.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant-32bit.mir index 2795e75da9846..bcdbecd28a17a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant-32bit.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant-32bit.mir @@ -39,8 +39,12 @@ body: | ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32) ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; CI: $vgpr0 = COPY [[MV1]](s32) + ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C7]](s32) + ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; CI: $vgpr0 = COPY [[OR2]](s32) %0:_(p6) = COPY $vgpr0 %1:_(s32) = G_LOAD %0 :: (load 4, align 1, addrspace 6) $vgpr0 = COPY %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir index d1c62196c7d6b..5bf60e2ce82c9 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir @@ -383,53 +383,78 @@ body: | ; CI-LABEL: name: test_load_constant_s32_align2 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) - ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C]](s64) ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p4) :: (load 2, addrspace 4) - ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; CI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; CI: $vgpr0 = COPY [[MV]](s32) + ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI: $vgpr0 = COPY [[OR]](s32) ; VI-LABEL: name: test_load_constant_s32_align2 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C]](s64) ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p4) :: (load 2, addrspace 4) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; VI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; VI: $vgpr0 = COPY [[MV]](s32) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; VI: $vgpr0 = COPY [[OR]](s32) ; GFX9-LABEL: name: test_load_constant_s32_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C]](s64) ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p4) :: (load 2, addrspace 4) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9: $vgpr0 = COPY [[MV]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX9: $vgpr0 = COPY [[OR]](s32) ; CI-MESA-LABEL: name: test_load_constant_s32_align2 ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI-MESA: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C]](s64) ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p4) :: (load 2, addrspace 4) - ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; CI-MESA: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; CI-MESA: $vgpr0 = COPY [[MV]](s32) + ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI-MESA: $vgpr0 = COPY [[OR]](s32) ; GFX9-MESA-LABEL: name: test_load_constant_s32_align2 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-MESA: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C]](s64) ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p4) :: (load 2, addrspace 4) - ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9-MESA: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-MESA: $vgpr0 = COPY [[MV]](s32) + ; GFX9-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; GFX9-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; GFX9-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX9-MESA: $vgpr0 = COPY [[OR]](s32) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(s32) = G_LOAD %0 :: (load 4, align 2, addrspace 4) $vgpr0 = COPY %1 @@ -471,8 +496,12 @@ body: | ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; CI: $vgpr0 = COPY [[MV]](s32) + ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32) + ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; CI: $vgpr0 = COPY [[OR2]](s32) ; VI-LABEL: name: test_load_constant_s32_align1 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) @@ -499,8 +528,12 @@ body: | ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; VI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; VI: $vgpr0 = COPY [[MV]](s32) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; VI: $vgpr0 = COPY [[OR2]](s32) ; GFX9-LABEL: name: test_load_constant_s32_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) @@ -527,8 +560,12 @@ body: | ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX9: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; GFX9: $vgpr0 = COPY [[MV]](s32) + ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; GFX9: $vgpr0 = COPY [[OR2]](s32) ; CI-MESA-LABEL: name: test_load_constant_s32_align1 ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) @@ -559,8 +596,12 @@ body: | ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CI-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI-MESA: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; CI-MESA: $vgpr0 = COPY [[MV]](s32) + ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32) + ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; CI-MESA: $vgpr0 = COPY [[OR2]](s32) ; GFX9-MESA-LABEL: name: test_load_constant_s32_align1 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) @@ -587,8 +628,12 @@ body: | ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX9-MESA: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; GFX9-MESA: $vgpr0 = COPY [[MV]](s32) + ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; GFX9-MESA: $vgpr0 = COPY [[OR2]](s32) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(s32) = G_LOAD %0 :: (load 4, align 1, addrspace 4) $vgpr0 = COPY %1 @@ -712,92 +757,142 @@ body: | ; CI-LABEL: name: test_load_constant_s64_align2 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) - ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C]](s64) ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p4) :: (load 2, addrspace 4) - ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI: [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C1]](s64) ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p4) :: (load 2, addrspace 4) - ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; CI: [[GEP2:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C2]](s64) ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p4) :: (load 2, addrspace 4) - ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) + ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; CI: $vgpr0_vgpr1 = COPY [[MV]](s64) ; VI-LABEL: name: test_load_constant_s64_align2 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C]](s64) ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p4) :: (load 2, addrspace 4) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C1]](s64) ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p4) :: (load 2, addrspace 4) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; VI: [[GEP2:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C2]](s64) ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p4) :: (load 2, addrspace 4) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) + ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; VI: $vgpr0_vgpr1 = COPY [[MV]](s64) ; GFX9-LABEL: name: test_load_constant_s64_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C]](s64) ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p4) :: (load 2, addrspace 4) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9: [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C1]](s64) ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p4) :: (load 2, addrspace 4) - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; GFX9: [[GEP2:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C2]](s64) ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p4) :: (load 2, addrspace 4) - ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) + ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[MV]](s64) ; CI-MESA-LABEL: name: test_load_constant_s64_align2 ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI-MESA: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C]](s64) ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p4) :: (load 2, addrspace 4) - ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI-MESA: [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C1]](s64) ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p4) :: (load 2, addrspace 4) - ; CI-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; CI-MESA: [[GEP2:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C2]](s64) ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p4) :: (load 2, addrspace 4) - ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; CI-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) + ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; CI-MESA: $vgpr0_vgpr1 = COPY [[MV]](s64) ; GFX9-MESA-LABEL: name: test_load_constant_s64_align2 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-MESA: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C]](s64) ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p4) :: (load 2, addrspace 4) - ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9-MESA: [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C1]](s64) ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p4) :: (load 2, addrspace 4) - ; GFX9-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; GFX9-MESA: [[GEP2:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C2]](s64) ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p4) :: (load 2, addrspace 4) - ; GFX9-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX9-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) + ; GFX9-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; GFX9-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; GFX9-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; GFX9-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[MV]](s64) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(s64) = G_LOAD %0 :: (load 8, align 2, addrspace 4) @@ -868,7 +963,16 @@ body: | ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32) ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) + ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32) + ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; CI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32) + ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; CI: $vgpr0_vgpr1 = COPY [[MV]](s64) ; VI-LABEL: name: test_load_constant_s64_align1 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 @@ -920,7 +1024,16 @@ body: | ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]] ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16) ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) + ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) + ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; VI: $vgpr0_vgpr1 = COPY [[MV]](s64) ; GFX9-LABEL: name: test_load_constant_s64_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 @@ -972,7 +1085,16 @@ body: | ; GFX9: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]] ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16) ; GFX9: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) + ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) + ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) + ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[MV]](s64) ; CI-MESA-LABEL: name: test_load_constant_s64_align1 ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 @@ -1032,7 +1154,16 @@ body: | ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32) ; CI-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; CI-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) + ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI-MESA: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32) + ; CI-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; CI-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CI-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32) + ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; CI-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; CI-MESA: $vgpr0_vgpr1 = COPY [[MV]](s64) ; GFX9-MESA-LABEL: name: test_load_constant_s64_align1 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 @@ -1084,7 +1215,16 @@ body: | ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]] ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16) ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; GFX9-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) + ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9-MESA: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) + ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; GFX9-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; GFX9-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) + ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; GFX9-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[MV]](s64) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(s64) = G_LOAD %0 :: (load 8, align 1, addrspace 4) @@ -1193,132 +1333,202 @@ body: | ; CI-LABEL: name: test_load_constant_s96_align2 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) - ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C]](s64) ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p4) :: (load 2, addrspace 4) - ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI: [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C1]](s64) ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p4) :: (load 2, addrspace 4) - ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; CI: [[GEP2:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C2]](s64) ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p4) :: (load 2, addrspace 4) - ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) ; CI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CI: [[GEP3:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C3]](s64) ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p4) :: (load 2, addrspace 4) - ; CI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; CI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10 ; CI: [[GEP4:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C4]](s64) ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p4) :: (load 2, addrspace 4) - ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) - ; CI: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16), [[TRUNC4]](s16), [[TRUNC5]](s16) + ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C5]] + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]] + ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C5]] + ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C5]] + ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] + ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]] + ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CI: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32), [[OR2]](s32) ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) ; VI-LABEL: name: test_load_constant_s96_align2 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C]](s64) ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p4) :: (load 2, addrspace 4) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C1]](s64) ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p4) :: (load 2, addrspace 4) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; VI: [[GEP2:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C2]](s64) ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p4) :: (load 2, addrspace 4) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI: [[GEP3:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C3]](s64) ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p4) :: (load 2, addrspace 4) - ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10 ; VI: [[GEP4:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C4]](s64) ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p4) :: (load 2, addrspace 4) - ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) - ; VI: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16), [[TRUNC4]](s16), [[TRUNC5]](s16) + ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C5]] + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]] + ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C5]] + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C5]] + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] + ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]] + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; VI: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32), [[OR2]](s32) ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) ; GFX9-LABEL: name: test_load_constant_s96_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C]](s64) ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p4) :: (load 2, addrspace 4) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9: [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C1]](s64) ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p4) :: (load 2, addrspace 4) - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; GFX9: [[GEP2:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C2]](s64) ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p4) :: (load 2, addrspace 4) - ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) ; GFX9: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; GFX9: [[GEP3:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C3]](s64) ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p4) :: (load 2, addrspace 4) - ; GFX9: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; GFX9: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10 ; GFX9: [[GEP4:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C4]](s64) ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p4) :: (load 2, addrspace 4) - ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) - ; GFX9: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16), [[TRUNC4]](s16), [[TRUNC5]](s16) + ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C5]] + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]] + ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C5]] + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C5]] + ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] + ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]] + ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; GFX9: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32), [[OR2]](s32) ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) ; CI-MESA-LABEL: name: test_load_constant_s96_align2 ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI-MESA: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C]](s64) ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p4) :: (load 2, addrspace 4) - ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI-MESA: [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C1]](s64) ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p4) :: (load 2, addrspace 4) - ; CI-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; CI-MESA: [[GEP2:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C2]](s64) ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p4) :: (load 2, addrspace 4) - ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) ; CI-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CI-MESA: [[GEP3:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C3]](s64) ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p4) :: (load 2, addrspace 4) - ; CI-MESA: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; CI-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10 ; CI-MESA: [[GEP4:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C4]](s64) ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p4) :: (load 2, addrspace 4) - ; CI-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) - ; CI-MESA: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16), [[TRUNC4]](s16), [[TRUNC5]](s16) + ; CI-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C5]] + ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]] + ; CI-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C5]] + ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C5]] + ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] + ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]] + ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CI-MESA: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32), [[OR2]](s32) ; CI-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) ; GFX9-MESA-LABEL: name: test_load_constant_s96_align2 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-MESA: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C]](s64) ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p4) :: (load 2, addrspace 4) - ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9-MESA: [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C1]](s64) ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p4) :: (load 2, addrspace 4) - ; GFX9-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; GFX9-MESA: [[GEP2:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C2]](s64) ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p4) :: (load 2, addrspace 4) - ; GFX9-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) ; GFX9-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; GFX9-MESA: [[GEP3:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C3]](s64) ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p4) :: (load 2, addrspace 4) - ; GFX9-MESA: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; GFX9-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10 ; GFX9-MESA: [[GEP4:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C4]](s64) ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p4) :: (load 2, addrspace 4) - ; GFX9-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) - ; GFX9-MESA: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16), [[TRUNC4]](s16), [[TRUNC5]](s16) + ; GFX9-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C5]] + ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]] + ; GFX9-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; GFX9-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C5]] + ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C5]] + ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; GFX9-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; GFX9-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] + ; GFX9-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; GFX9-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]] + ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; GFX9-MESA: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32), [[OR2]](s32) ; GFX9-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(s96) = G_LOAD %0 :: (load 12, align 2, addrspace 4) @@ -1417,7 +1627,20 @@ body: | ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C12]](s32) ; CI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) ; CI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] - ; CI: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16), [[OR4]](s16), [[OR5]](s16) + ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C14]](s32) + ; CI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL6]] + ; CI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C14]](s32) + ; CI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL7]] + ; CI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; CI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) + ; CI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C14]](s32) + ; CI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] + ; CI: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR6]](s32), [[OR7]](s32), [[OR8]](s32) ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) ; VI-LABEL: name: test_load_constant_s96_align1 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 @@ -1493,7 +1716,20 @@ body: | ; VI: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C11]] ; VI: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C12]](s16) ; VI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] - ; VI: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16), [[OR4]](s16), [[OR5]](s16) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C13]](s32) + ; VI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL6]] + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; VI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C13]](s32) + ; VI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL7]] + ; VI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; VI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) + ; VI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C13]](s32) + ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] + ; VI: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR6]](s32), [[OR7]](s32), [[OR8]](s32) ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) ; GFX9-LABEL: name: test_load_constant_s96_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 @@ -1569,7 +1805,20 @@ body: | ; GFX9: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C11]] ; GFX9: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C12]](s16) ; GFX9: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] - ; GFX9: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16), [[OR4]](s16), [[OR5]](s16) + ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C13]](s32) + ; GFX9: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL6]] + ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; GFX9: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C13]](s32) + ; GFX9: [[OR7:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL7]] + ; GFX9: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; GFX9: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) + ; GFX9: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C13]](s32) + ; GFX9: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] + ; GFX9: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR6]](s32), [[OR7]](s32), [[OR8]](s32) ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) ; CI-MESA-LABEL: name: test_load_constant_s96_align1 ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 @@ -1657,7 +1906,20 @@ body: | ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C12]](s32) ; CI-MESA: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) ; CI-MESA: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] - ; CI-MESA: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16), [[OR4]](s16), [[OR5]](s16) + ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI-MESA: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C14]](s32) + ; CI-MESA: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL6]] + ; CI-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CI-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C14]](s32) + ; CI-MESA: [[OR7:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL7]] + ; CI-MESA: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; CI-MESA: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) + ; CI-MESA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C14]](s32) + ; CI-MESA: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] + ; CI-MESA: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR6]](s32), [[OR7]](s32), [[OR8]](s32) ; CI-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) ; GFX9-MESA-LABEL: name: test_load_constant_s96_align1 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 @@ -1733,7 +1995,20 @@ body: | ; GFX9-MESA: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C11]] ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C12]](s16) ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] - ; GFX9-MESA: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16), [[OR4]](s16), [[OR5]](s16) + ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9-MESA: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C13]](s32) + ; GFX9-MESA: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL6]] + ; GFX9-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; GFX9-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; GFX9-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C13]](s32) + ; GFX9-MESA: [[OR7:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL7]] + ; GFX9-MESA: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; GFX9-MESA: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) + ; GFX9-MESA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C13]](s32) + ; GFX9-MESA: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] + ; GFX9-MESA: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR6]](s32), [[OR7]](s32), [[OR8]](s32) ; GFX9-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(s96) = G_LOAD %0 :: (load 12, align 1, addrspace 4) @@ -2007,7 +2282,24 @@ body: | ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C16]](s32) ; CI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) ; CI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] - ; CI: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16), [[OR4]](s16), [[OR5]](s16), [[OR6]](s16), [[OR7]](s16) + ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C18]](s32) + ; CI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL8]] + ; CI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C18]](s32) + ; CI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL9]] + ; CI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; CI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) + ; CI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C18]](s32) + ; CI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] + ; CI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; CI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; CI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C18]](s32) + ; CI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; CI: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR8]](s32), [[OR9]](s32), [[OR10]](s32), [[OR11]](s32) ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) ; VI-LABEL: name: test_load_constant_s128_align1 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 @@ -2107,7 +2399,24 @@ body: | ; VI: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C15]] ; VI: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C16]](s16) ; VI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL7]] - ; VI: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16), [[OR4]](s16), [[OR5]](s16), [[OR6]](s16), [[OR7]](s16) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C17]](s32) + ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL8]] + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; VI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C17]](s32) + ; VI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL9]] + ; VI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; VI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) + ; VI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C17]](s32) + ; VI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] + ; VI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; VI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C17]](s32) + ; VI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; VI: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR8]](s32), [[OR9]](s32), [[OR10]](s32), [[OR11]](s32) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) ; GFX9-LABEL: name: test_load_constant_s128_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 @@ -2207,7 +2516,24 @@ body: | ; GFX9: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C15]] ; GFX9: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C16]](s16) ; GFX9: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL7]] - ; GFX9: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16), [[OR4]](s16), [[OR5]](s16), [[OR6]](s16), [[OR7]](s16) + ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C17]](s32) + ; GFX9: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL8]] + ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; GFX9: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C17]](s32) + ; GFX9: [[OR9:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL9]] + ; GFX9: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; GFX9: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) + ; GFX9: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C17]](s32) + ; GFX9: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] + ; GFX9: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; GFX9: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; GFX9: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C17]](s32) + ; GFX9: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; GFX9: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR8]](s32), [[OR9]](s32), [[OR10]](s32), [[OR11]](s32) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) ; CI-MESA-LABEL: name: test_load_constant_s128_align1 ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 @@ -2323,7 +2649,24 @@ body: | ; CI-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C16]](s32) ; CI-MESA: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) ; CI-MESA: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] - ; CI-MESA: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16), [[OR4]](s16), [[OR5]](s16), [[OR6]](s16), [[OR7]](s16) + ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI-MESA: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C18]](s32) + ; CI-MESA: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL8]] + ; CI-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CI-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI-MESA: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C18]](s32) + ; CI-MESA: [[OR9:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL9]] + ; CI-MESA: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; CI-MESA: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) + ; CI-MESA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C18]](s32) + ; CI-MESA: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] + ; CI-MESA: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; CI-MESA: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; CI-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C18]](s32) + ; CI-MESA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; CI-MESA: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR8]](s32), [[OR9]](s32), [[OR10]](s32), [[OR11]](s32) ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) ; GFX9-MESA-LABEL: name: test_load_constant_s128_align1 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 @@ -2423,7 +2766,24 @@ body: | ; GFX9-MESA: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C15]] ; GFX9-MESA: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C16]](s16) ; GFX9-MESA: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL7]] - ; GFX9-MESA: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16), [[OR4]](s16), [[OR5]](s16), [[OR6]](s16), [[OR7]](s16) + ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9-MESA: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C17]](s32) + ; GFX9-MESA: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL8]] + ; GFX9-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; GFX9-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; GFX9-MESA: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C17]](s32) + ; GFX9-MESA: [[OR9:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL9]] + ; GFX9-MESA: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; GFX9-MESA: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) + ; GFX9-MESA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C17]](s32) + ; GFX9-MESA: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] + ; GFX9-MESA: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; GFX9-MESA: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; GFX9-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C17]](s32) + ; GFX9-MESA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; GFX9-MESA: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR8]](s32), [[OR9]](s32), [[OR10]](s32), [[OR11]](s32) ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(s128) = G_LOAD %0 :: (load 16, align 1, addrspace 4) @@ -2587,7 +2947,16 @@ body: | ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32) ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) + ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32) + ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; CI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32) + ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; CI: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; CI: $vgpr0_vgpr1 = COPY [[MV]](p1) ; VI-LABEL: name: test_load_constant_p1_align1 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 @@ -2639,7 +3008,16 @@ body: | ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]] ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16) ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; VI: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) + ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) + ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; VI: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; VI: $vgpr0_vgpr1 = COPY [[MV]](p1) ; GFX9-LABEL: name: test_load_constant_p1_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 @@ -2691,7 +3069,16 @@ body: | ; GFX9: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]] ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16) ; GFX9: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; GFX9: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) + ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) + ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) + ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; GFX9: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[MV]](p1) ; CI-MESA-LABEL: name: test_load_constant_p1_align1 ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 @@ -2751,7 +3138,16 @@ body: | ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32) ; CI-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; CI-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI-MESA: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) + ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI-MESA: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32) + ; CI-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; CI-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CI-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32) + ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; CI-MESA: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; CI-MESA: $vgpr0_vgpr1 = COPY [[MV]](p1) ; GFX9-MESA-LABEL: name: test_load_constant_p1_align1 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 @@ -2803,7 +3199,16 @@ body: | ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]] ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16) ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; GFX9-MESA: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) + ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9-MESA: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) + ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; GFX9-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; GFX9-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) + ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; GFX9-MESA: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[MV]](p1) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(p1) = G_LOAD %0 :: (load 8, align 1, addrspace 4) @@ -2912,92 +3317,142 @@ body: | ; CI-LABEL: name: test_load_constant_p4_align2 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) - ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C]](s64) ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p4) :: (load 2, addrspace 4) - ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI: [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C1]](s64) ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p4) :: (load 2, addrspace 4) - ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; CI: [[GEP2:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C2]](s64) ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p4) :: (load 2, addrspace 4) - ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; CI: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) + ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; CI: $vgpr0_vgpr1 = COPY [[MV]](p4) ; VI-LABEL: name: test_load_constant_p4_align2 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C]](s64) ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p4) :: (load 2, addrspace 4) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C1]](s64) ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p4) :: (load 2, addrspace 4) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; VI: [[GEP2:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C2]](s64) ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p4) :: (load 2, addrspace 4) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; VI: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) + ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; VI: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; VI: $vgpr0_vgpr1 = COPY [[MV]](p4) ; GFX9-LABEL: name: test_load_constant_p4_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C]](s64) ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p4) :: (load 2, addrspace 4) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9: [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C1]](s64) ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p4) :: (load 2, addrspace 4) - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; GFX9: [[GEP2:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C2]](s64) ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p4) :: (load 2, addrspace 4) - ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX9: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) + ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; GFX9: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[MV]](p4) ; CI-MESA-LABEL: name: test_load_constant_p4_align2 ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI-MESA: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C]](s64) ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p4) :: (load 2, addrspace 4) - ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI-MESA: [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C1]](s64) ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p4) :: (load 2, addrspace 4) - ; CI-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; CI-MESA: [[GEP2:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C2]](s64) ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p4) :: (load 2, addrspace 4) - ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; CI-MESA: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) + ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI-MESA: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; CI-MESA: $vgpr0_vgpr1 = COPY [[MV]](p4) ; GFX9-MESA-LABEL: name: test_load_constant_p4_align2 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-MESA: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C]](s64) ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p4) :: (load 2, addrspace 4) - ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9-MESA: [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C1]](s64) ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p4) :: (load 2, addrspace 4) - ; GFX9-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; GFX9-MESA: [[GEP2:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C2]](s64) ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p4) :: (load 2, addrspace 4) - ; GFX9-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX9-MESA: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) + ; GFX9-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; GFX9-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; GFX9-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; GFX9-MESA: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[MV]](p4) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(p4) = G_LOAD %0 :: (load 8, align 2, addrspace 4) @@ -3068,7 +3523,16 @@ body: | ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32) ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) + ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32) + ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; CI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32) + ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; CI: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; CI: $vgpr0_vgpr1 = COPY [[MV]](p4) ; VI-LABEL: name: test_load_constant_p4_align1 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 @@ -3120,7 +3584,16 @@ body: | ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]] ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16) ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; VI: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) + ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) + ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; VI: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; VI: $vgpr0_vgpr1 = COPY [[MV]](p4) ; GFX9-LABEL: name: test_load_constant_p4_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 @@ -3172,7 +3645,16 @@ body: | ; GFX9: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]] ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16) ; GFX9: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; GFX9: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) + ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) + ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) + ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; GFX9: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[MV]](p4) ; CI-MESA-LABEL: name: test_load_constant_p4_align1 ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 @@ -3232,7 +3714,16 @@ body: | ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32) ; CI-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; CI-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI-MESA: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) + ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI-MESA: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32) + ; CI-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; CI-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CI-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32) + ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; CI-MESA: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; CI-MESA: $vgpr0_vgpr1 = COPY [[MV]](p4) ; GFX9-MESA-LABEL: name: test_load_constant_p4_align1 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 @@ -3284,7 +3775,16 @@ body: | ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]] ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16) ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; GFX9-MESA: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) + ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9-MESA: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) + ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; GFX9-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; GFX9-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) + ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; GFX9-MESA: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[MV]](p4) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(p4) = G_LOAD %0 :: (load 8, align 1, addrspace 4) @@ -3331,53 +3831,83 @@ body: | ; CI-LABEL: name: test_load_constant_p5_align2 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) - ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C]](s64) ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p4) :: (load 2, addrspace 4) - ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; CI: [[MV:%[0-9]+]]:_(p5) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; CI: $vgpr0 = COPY [[MV]](p5) + ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) + ; CI: $vgpr0 = COPY [[INTTOPTR]](p5) ; VI-LABEL: name: test_load_constant_p5_align2 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C]](s64) ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p4) :: (load 2, addrspace 4) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; VI: [[MV:%[0-9]+]]:_(p5) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; VI: $vgpr0 = COPY [[MV]](p5) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; VI: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) + ; VI: $vgpr0 = COPY [[INTTOPTR]](p5) ; GFX9-LABEL: name: test_load_constant_p5_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C]](s64) ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p4) :: (load 2, addrspace 4) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9: [[MV:%[0-9]+]]:_(p5) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9: $vgpr0 = COPY [[MV]](p5) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX9: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) + ; GFX9: $vgpr0 = COPY [[INTTOPTR]](p5) ; CI-MESA-LABEL: name: test_load_constant_p5_align2 ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI-MESA: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C]](s64) ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p4) :: (load 2, addrspace 4) - ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; CI-MESA: [[MV:%[0-9]+]]:_(p5) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; CI-MESA: $vgpr0 = COPY [[MV]](p5) + ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI-MESA: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) + ; CI-MESA: $vgpr0 = COPY [[INTTOPTR]](p5) ; GFX9-MESA-LABEL: name: test_load_constant_p5_align2 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-MESA: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C]](s64) ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p4) :: (load 2, addrspace 4) - ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9-MESA: [[MV:%[0-9]+]]:_(p5) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-MESA: $vgpr0 = COPY [[MV]](p5) + ; GFX9-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; GFX9-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; GFX9-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX9-MESA: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) + ; GFX9-MESA: $vgpr0 = COPY [[INTTOPTR]](p5) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(p5) = G_LOAD %0 :: (load 4, align 2, addrspace 4) $vgpr0 = COPY %1 @@ -3419,8 +3949,13 @@ body: | ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI: [[MV:%[0-9]+]]:_(p5) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; CI: $vgpr0 = COPY [[MV]](p5) + ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32) + ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; CI: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) + ; CI: $vgpr0 = COPY [[INTTOPTR]](p5) ; VI-LABEL: name: test_load_constant_p5_align1 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) @@ -3447,8 +3982,13 @@ body: | ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; VI: [[MV:%[0-9]+]]:_(p5) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; VI: $vgpr0 = COPY [[MV]](p5) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; VI: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) + ; VI: $vgpr0 = COPY [[INTTOPTR]](p5) ; GFX9-LABEL: name: test_load_constant_p5_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) @@ -3475,8 +4015,13 @@ body: | ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX9: [[MV:%[0-9]+]]:_(p5) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; GFX9: $vgpr0 = COPY [[MV]](p5) + ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; GFX9: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) + ; GFX9: $vgpr0 = COPY [[INTTOPTR]](p5) ; CI-MESA-LABEL: name: test_load_constant_p5_align1 ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) @@ -3507,8 +4052,13 @@ body: | ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CI-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI-MESA: [[MV:%[0-9]+]]:_(p5) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; CI-MESA: $vgpr0 = COPY [[MV]](p5) + ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32) + ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; CI-MESA: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) + ; CI-MESA: $vgpr0 = COPY [[INTTOPTR]](p5) ; GFX9-MESA-LABEL: name: test_load_constant_p5_align1 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) @@ -3535,8 +4085,13 @@ body: | ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX9-MESA: [[MV:%[0-9]+]]:_(p5) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; GFX9-MESA: $vgpr0 = COPY [[MV]](p5) + ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; GFX9-MESA: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) + ; GFX9-MESA: $vgpr0 = COPY [[INTTOPTR]](p5) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(p5) = G_LOAD %0 :: (load 4, align 1, addrspace 4) $vgpr0 = COPY %1 @@ -6390,166 +6945,256 @@ body: | ; CI-LABEL: name: test_load_constant_v2s64_align2 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) - ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C]](s64) ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p4) :: (load 2, addrspace 4) - ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI: [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C1]](s64) ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p4) :: (load 2, addrspace 4) - ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; CI: [[GEP2:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C2]](s64) ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p4) :: (load 2, addrspace 4) - ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) - ; CI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI: [[GEP3:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C3]](s64) + ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) + ; CI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; CI: [[GEP3:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C5]](s64) ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p4) :: (load 2, addrspace 4) - ; CI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; CI: [[GEP4:%[0-9]+]]:_(p4) = G_GEP [[GEP3]], [[C]](s64) ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p4) :: (load 2, addrspace 4) - ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) ; CI: [[GEP5:%[0-9]+]]:_(p4) = G_GEP [[GEP3]], [[C1]](s64) ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[GEP5]](p4) :: (load 2, addrspace 4) - ; CI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; CI: [[GEP6:%[0-9]+]]:_(p4) = G_GEP [[GEP3]], [[C2]](s64) ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[GEP6]](p4) :: (load 2, addrspace 4) - ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; CI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[TRUNC4]](s16), [[TRUNC5]](s16), [[TRUNC6]](s16), [[TRUNC7]](s16) + ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] + ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] + ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) + ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] + ; CI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) + ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]] + ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C4]](s32) + ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; CI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR3]](s32) ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; VI-LABEL: name: test_load_constant_v2s64_align2 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C]](s64) ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p4) :: (load 2, addrspace 4) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C1]](s64) ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p4) :: (load 2, addrspace 4) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; VI: [[GEP2:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C2]](s64) ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p4) :: (load 2, addrspace 4) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) - ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI: [[GEP3:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C3]](s64) + ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) + ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; VI: [[GEP3:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C5]](s64) ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p4) :: (load 2, addrspace 4) - ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; VI: [[GEP4:%[0-9]+]]:_(p4) = G_GEP [[GEP3]], [[C]](s64) ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p4) :: (load 2, addrspace 4) - ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) ; VI: [[GEP5:%[0-9]+]]:_(p4) = G_GEP [[GEP3]], [[C1]](s64) ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[GEP5]](p4) :: (load 2, addrspace 4) - ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; VI: [[GEP6:%[0-9]+]]:_(p4) = G_GEP [[GEP3]], [[C2]](s64) ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[GEP6]](p4) :: (load 2, addrspace 4) - ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; VI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[TRUNC4]](s16), [[TRUNC5]](s16), [[TRUNC6]](s16), [[TRUNC7]](s16) + ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] + ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] + ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) + ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]] + ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C4]](s32) + ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; VI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR3]](s32) ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; GFX9-LABEL: name: test_load_constant_v2s64_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C]](s64) ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p4) :: (load 2, addrspace 4) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9: [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C1]](s64) ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p4) :: (load 2, addrspace 4) - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; GFX9: [[GEP2:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C2]](s64) ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p4) :: (load 2, addrspace 4) - ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GFX9: [[GEP3:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C3]](s64) + ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) + ; GFX9: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; GFX9: [[GEP3:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C5]](s64) ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p4) :: (load 2, addrspace 4) - ; GFX9: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; GFX9: [[GEP4:%[0-9]+]]:_(p4) = G_GEP [[GEP3]], [[C]](s64) ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p4) :: (load 2, addrspace 4) - ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) ; GFX9: [[GEP5:%[0-9]+]]:_(p4) = G_GEP [[GEP3]], [[C1]](s64) ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[GEP5]](p4) :: (load 2, addrspace 4) - ; GFX9: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; GFX9: [[GEP6:%[0-9]+]]:_(p4) = G_GEP [[GEP3]], [[C2]](s64) ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[GEP6]](p4) :: (load 2, addrspace 4) - ; GFX9: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[TRUNC4]](s16), [[TRUNC5]](s16), [[TRUNC6]](s16), [[TRUNC7]](s16) + ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] + ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] + ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) + ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; GFX9: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] + ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) + ; GFX9: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]] + ; GFX9: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C4]](s32) + ; GFX9: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR3]](s32) ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; CI-MESA-LABEL: name: test_load_constant_v2s64_align2 ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI-MESA: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C]](s64) ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p4) :: (load 2, addrspace 4) - ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI-MESA: [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C1]](s64) ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p4) :: (load 2, addrspace 4) - ; CI-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; CI-MESA: [[GEP2:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C2]](s64) ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p4) :: (load 2, addrspace 4) - ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; CI-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) - ; CI-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-MESA: [[GEP3:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C3]](s64) + ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) + ; CI-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; CI-MESA: [[GEP3:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C5]](s64) ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p4) :: (load 2, addrspace 4) - ; CI-MESA: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; CI-MESA: [[GEP4:%[0-9]+]]:_(p4) = G_GEP [[GEP3]], [[C]](s64) ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p4) :: (load 2, addrspace 4) - ; CI-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) ; CI-MESA: [[GEP5:%[0-9]+]]:_(p4) = G_GEP [[GEP3]], [[C1]](s64) ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[GEP5]](p4) :: (load 2, addrspace 4) - ; CI-MESA: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; CI-MESA: [[GEP6:%[0-9]+]]:_(p4) = G_GEP [[GEP3]], [[C2]](s64) ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[GEP6]](p4) :: (load 2, addrspace 4) - ; CI-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; CI-MESA: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[TRUNC4]](s16), [[TRUNC5]](s16), [[TRUNC6]](s16), [[TRUNC7]](s16) + ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] + ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] + ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) + ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CI-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; CI-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] + ; CI-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) + ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]] + ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C4]](s32) + ; CI-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; CI-MESA: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR3]](s32) ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; GFX9-MESA-LABEL: name: test_load_constant_v2s64_align2 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-MESA: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C]](s64) ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p4) :: (load 2, addrspace 4) - ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9-MESA: [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C1]](s64) ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p4) :: (load 2, addrspace 4) - ; GFX9-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; GFX9-MESA: [[GEP2:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C2]](s64) ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p4) :: (load 2, addrspace 4) - ; GFX9-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX9-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GFX9-MESA: [[GEP3:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C3]](s64) - ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p4) :: (load 2, addrspace 4) - ; GFX9-MESA: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) + ; GFX9-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; GFX9-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; GFX9-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; GFX9-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) + ; GFX9-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; GFX9-MESA: [[GEP3:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C5]](s64) + ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p4) :: (load 2, addrspace 4) ; GFX9-MESA: [[GEP4:%[0-9]+]]:_(p4) = G_GEP [[GEP3]], [[C]](s64) ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p4) :: (load 2, addrspace 4) - ; GFX9-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) ; GFX9-MESA: [[GEP5:%[0-9]+]]:_(p4) = G_GEP [[GEP3]], [[C1]](s64) ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[GEP5]](p4) :: (load 2, addrspace 4) - ; GFX9-MESA: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; GFX9-MESA: [[GEP6:%[0-9]+]]:_(p4) = G_GEP [[GEP3]], [[C2]](s64) ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[GEP6]](p4) :: (load 2, addrspace 4) - ; GFX9-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; GFX9-MESA: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[TRUNC4]](s16), [[TRUNC5]](s16), [[TRUNC6]](s16), [[TRUNC7]](s16) + ; GFX9-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; GFX9-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] + ; GFX9-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; GFX9-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] + ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) + ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; GFX9-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; GFX9-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] + ; GFX9-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) + ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]] + ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C4]](s32) + ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; GFX9-MESA: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR3]](s32) ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) %0:_(p4) = COPY $vgpr0_vgpr1 @@ -6621,9 +7266,18 @@ body: | ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32) ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) - ; CI: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI: [[GEP7:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C10]](s64) + ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32) + ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; CI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32) + ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) + ; CI: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; CI: [[GEP7:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C11]](s64) ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p4) :: (load 1, addrspace 4) ; CI: [[GEP8:%[0-9]+]]:_(p4) = G_GEP [[GEP7]], [[C]](s64) ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p4) :: (load 1, addrspace 4) @@ -6644,34 +7298,42 @@ body: | ; CI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) ; CI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C9]] - ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY8]](s32) - ; CI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; CI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] + ; CI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY8]](s32) + ; CI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) + ; CI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] ; CI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; CI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] ; CI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) ; CI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C9]] - ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY10]](s32) - ; CI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) - ; CI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] + ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY10]](s32) + ; CI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) + ; CI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] ; CI: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32) ; CI: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]] ; CI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) ; CI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C9]] - ; CI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY12]](s32) - ; CI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) - ; CI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] + ; CI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY12]](s32) + ; CI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32) + ; CI: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] ; CI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; CI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]] ; CI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32) ; CI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C9]] - ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY14]](s32) - ; CI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) - ; CI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] - ; CI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16), [[OR6]](s16), [[OR7]](s16) + ; CI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY14]](s32) + ; CI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) + ; CI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] + ; CI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; CI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; CI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C10]](s32) + ; CI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] + ; CI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) + ; CI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; CI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C10]](s32) + ; CI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; CI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; VI-LABEL: name: test_load_constant_v2s64_align1 @@ -6724,9 +7386,18 @@ body: | ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]] ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16) ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) - ; VI: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI: [[GEP7:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C9]](s64) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) + ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) + ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) + ; VI: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; VI: [[GEP7:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C10]](s64) ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p4) :: (load 1, addrspace 4) ; VI: [[GEP8:%[0-9]+]]:_(p4) = G_GEP [[GEP7]], [[C]](s64) ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p4) :: (load 1, addrspace 4) @@ -6746,27 +7417,35 @@ body: | ; VI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] ; VI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) ; VI: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]] - ; VI: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) - ; VI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]] + ; VI: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) + ; VI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]] ; VI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; VI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] ; VI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) ; VI: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]] - ; VI: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) - ; VI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] + ; VI: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) + ; VI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]] ; VI: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32) ; VI: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]] ; VI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32) ; VI: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]] - ; VI: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16) - ; VI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL6]] + ; VI: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16) + ; VI: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]] ; VI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; VI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]] ; VI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32) ; VI: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]] - ; VI: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16) - ; VI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL7]] - ; VI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16), [[OR6]](s16), [[OR7]](s16) + ; VI: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16) + ; VI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]] + ; VI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; VI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; VI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32) + ; VI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] + ; VI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) + ; VI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32) + ; VI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; VI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; GFX9-LABEL: name: test_load_constant_v2s64_align1 @@ -6819,9 +7498,18 @@ body: | ; GFX9: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]] ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16) ; GFX9: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) - ; GFX9: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GFX9: [[GEP7:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C9]](s64) + ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) + ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) + ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) + ; GFX9: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; GFX9: [[GEP7:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C10]](s64) ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p4) :: (load 1, addrspace 4) ; GFX9: [[GEP8:%[0-9]+]]:_(p4) = G_GEP [[GEP7]], [[C]](s64) ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p4) :: (load 1, addrspace 4) @@ -6841,27 +7529,35 @@ body: | ; GFX9: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] ; GFX9: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) ; GFX9: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]] - ; GFX9: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) - ; GFX9: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]] + ; GFX9: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) + ; GFX9: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]] ; GFX9: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; GFX9: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] ; GFX9: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) ; GFX9: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]] - ; GFX9: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) - ; GFX9: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] + ; GFX9: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) + ; GFX9: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]] ; GFX9: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32) ; GFX9: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]] ; GFX9: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32) ; GFX9: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]] - ; GFX9: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16) - ; GFX9: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL6]] + ; GFX9: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16) + ; GFX9: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]] ; GFX9: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; GFX9: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]] ; GFX9: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32) ; GFX9: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]] - ; GFX9: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16) - ; GFX9: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL7]] - ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16), [[OR6]](s16), [[OR7]](s16) + ; GFX9: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16) + ; GFX9: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]] + ; GFX9: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; GFX9: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; GFX9: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32) + ; GFX9: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] + ; GFX9: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) + ; GFX9: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; GFX9: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32) + ; GFX9: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; CI-MESA-LABEL: name: test_load_constant_v2s64_align1 @@ -6922,9 +7618,18 @@ body: | ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32) ; CI-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; CI-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) - ; CI-MESA: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-MESA: [[GEP7:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C10]](s64) + ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI-MESA: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32) + ; CI-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; CI-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CI-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32) + ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; CI-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) + ; CI-MESA: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; CI-MESA: [[GEP7:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C11]](s64) ; CI-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p4) :: (load 1, addrspace 4) ; CI-MESA: [[GEP8:%[0-9]+]]:_(p4) = G_GEP [[GEP7]], [[C]](s64) ; CI-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p4) :: (load 1, addrspace 4) @@ -6945,34 +7650,42 @@ body: | ; CI-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) ; CI-MESA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C9]] - ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY8]](s32) - ; CI-MESA: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; CI-MESA: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] + ; CI-MESA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY8]](s32) + ; CI-MESA: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) + ; CI-MESA: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] ; CI-MESA: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; CI-MESA: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] ; CI-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI-MESA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) ; CI-MESA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C9]] - ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY10]](s32) - ; CI-MESA: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) - ; CI-MESA: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] + ; CI-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY10]](s32) + ; CI-MESA: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) + ; CI-MESA: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] ; CI-MESA: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32) ; CI-MESA: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]] ; CI-MESA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI-MESA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) ; CI-MESA: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C9]] - ; CI-MESA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY12]](s32) - ; CI-MESA: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) - ; CI-MESA: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] + ; CI-MESA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY12]](s32) + ; CI-MESA: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32) + ; CI-MESA: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] ; CI-MESA: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; CI-MESA: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]] ; CI-MESA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI-MESA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32) ; CI-MESA: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C9]] - ; CI-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY14]](s32) - ; CI-MESA: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) - ; CI-MESA: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] - ; CI-MESA: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16), [[OR6]](s16), [[OR7]](s16) + ; CI-MESA: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY14]](s32) + ; CI-MESA: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) + ; CI-MESA: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] + ; CI-MESA: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; CI-MESA: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; CI-MESA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C10]](s32) + ; CI-MESA: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] + ; CI-MESA: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) + ; CI-MESA: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; CI-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C10]](s32) + ; CI-MESA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; CI-MESA: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; GFX9-MESA-LABEL: name: test_load_constant_v2s64_align1 @@ -7025,9 +7738,18 @@ body: | ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]] ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16) ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; GFX9-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) - ; GFX9-MESA: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GFX9-MESA: [[GEP7:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C9]](s64) + ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9-MESA: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) + ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; GFX9-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; GFX9-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) + ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; GFX9-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) + ; GFX9-MESA: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; GFX9-MESA: [[GEP7:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C10]](s64) ; GFX9-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p4) :: (load 1, addrspace 4) ; GFX9-MESA: [[GEP8:%[0-9]+]]:_(p4) = G_GEP [[GEP7]], [[C]](s64) ; GFX9-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p4) :: (load 1, addrspace 4) @@ -7047,27 +7769,35 @@ body: | ; GFX9-MESA: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] ; GFX9-MESA: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) ; GFX9-MESA: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]] - ; GFX9-MESA: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) - ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]] + ; GFX9-MESA: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) + ; GFX9-MESA: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]] ; GFX9-MESA: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; GFX9-MESA: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] ; GFX9-MESA: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) ; GFX9-MESA: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]] - ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) - ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] + ; GFX9-MESA: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) + ; GFX9-MESA: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]] ; GFX9-MESA: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32) ; GFX9-MESA: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]] ; GFX9-MESA: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32) ; GFX9-MESA: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]] - ; GFX9-MESA: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16) - ; GFX9-MESA: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL6]] + ; GFX9-MESA: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16) + ; GFX9-MESA: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]] ; GFX9-MESA: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; GFX9-MESA: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]] ; GFX9-MESA: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32) ; GFX9-MESA: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]] - ; GFX9-MESA: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16) - ; GFX9-MESA: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL7]] - ; GFX9-MESA: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16), [[OR6]](s16), [[OR7]](s16) + ; GFX9-MESA: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16) + ; GFX9-MESA: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]] + ; GFX9-MESA: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; GFX9-MESA: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; GFX9-MESA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32) + ; GFX9-MESA: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] + ; GFX9-MESA: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) + ; GFX9-MESA: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; GFX9-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32) + ; GFX9-MESA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; GFX9-MESA: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) %0:_(p4) = COPY $vgpr0_vgpr1 @@ -7225,9 +7955,18 @@ body: | ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32) ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) - ; CI: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI: [[GEP7:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C10]](s64) + ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32) + ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; CI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32) + ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) + ; CI: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; CI: [[GEP7:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C11]](s64) ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p4) :: (load 1, addrspace 4) ; CI: [[GEP8:%[0-9]+]]:_(p4) = G_GEP [[GEP7]], [[C]](s64) ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p4) :: (load 1, addrspace 4) @@ -7248,36 +7987,44 @@ body: | ; CI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) ; CI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C9]] - ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY8]](s32) - ; CI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; CI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] + ; CI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY8]](s32) + ; CI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) + ; CI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] ; CI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; CI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] ; CI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) ; CI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C9]] - ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY10]](s32) - ; CI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) - ; CI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] + ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY10]](s32) + ; CI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) + ; CI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] ; CI: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32) ; CI: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]] ; CI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) ; CI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C9]] - ; CI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY12]](s32) - ; CI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) - ; CI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] + ; CI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY12]](s32) + ; CI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32) + ; CI: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] ; CI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; CI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]] ; CI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32) ; CI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C9]] - ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY14]](s32) - ; CI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) - ; CI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] - ; CI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16), [[OR6]](s16), [[OR7]](s16) - ; CI: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI: [[GEP15:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C11]](s64) + ; CI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY14]](s32) + ; CI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) + ; CI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] + ; CI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; CI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; CI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C10]](s32) + ; CI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] + ; CI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) + ; CI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; CI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C10]](s32) + ; CI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; CI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) + ; CI: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; CI: [[GEP15:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C12]](s64) ; CI: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[GEP15]](p4) :: (load 1, addrspace 4) ; CI: [[GEP16:%[0-9]+]]:_(p4) = G_GEP [[GEP15]], [[C]](s64) ; CI: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[GEP16]](p4) :: (load 1, addrspace 4) @@ -7298,34 +8045,42 @@ body: | ; CI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LOAD17]](s32) ; CI: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C9]] - ; CI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[COPY16]](s32) - ; CI: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32) - ; CI: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[TRUNC17]] + ; CI: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[COPY16]](s32) + ; CI: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[SHL12]](s32) + ; CI: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[TRUNC17]] ; CI: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32) ; CI: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C7]] ; CI: [[COPY18:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LOAD19]](s32) ; CI: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C9]] - ; CI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[COPY18]](s32) - ; CI: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) - ; CI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[TRUNC19]] + ; CI: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[COPY18]](s32) + ; CI: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[SHL13]](s32) + ; CI: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[TRUNC19]] ; CI: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD20]](s32) ; CI: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C7]] ; CI: [[COPY20:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LOAD21]](s32) ; CI: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C9]] - ; CI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[COPY20]](s32) - ; CI: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[SHL10]](s32) - ; CI: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[TRUNC21]] + ; CI: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[COPY20]](s32) + ; CI: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[SHL14]](s32) + ; CI: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[TRUNC21]] ; CI: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32) ; CI: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C7]] ; CI: [[COPY22:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LOAD23]](s32) ; CI: [[AND23:%[0-9]+]]:_(s32) = G_AND [[COPY23]], [[C9]] - ; CI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[COPY22]](s32) - ; CI: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[SHL11]](s32) - ; CI: [[OR11:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[TRUNC23]] - ; CI: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR8]](s16), [[OR9]](s16), [[OR10]](s16), [[OR11]](s16) + ; CI: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[COPY22]](s32) + ; CI: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[SHL15]](s32) + ; CI: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[TRUNC23]] + ; CI: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16) + ; CI: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16) + ; CI: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C10]](s32) + ; CI: [[OR16:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL16]] + ; CI: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16) + ; CI: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR15]](s16) + ; CI: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C10]](s32) + ; CI: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]] + ; CI: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32) ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) ; CI: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF ; CI: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 @@ -7380,9 +8135,18 @@ body: | ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]] ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16) ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) - ; VI: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI: [[GEP7:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C9]](s64) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) + ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) + ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) + ; VI: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; VI: [[GEP7:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C10]](s64) ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p4) :: (load 1, addrspace 4) ; VI: [[GEP8:%[0-9]+]]:_(p4) = G_GEP [[GEP7]], [[C]](s64) ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p4) :: (load 1, addrspace 4) @@ -7402,29 +8166,37 @@ body: | ; VI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] ; VI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) ; VI: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]] - ; VI: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) - ; VI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]] + ; VI: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) + ; VI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]] ; VI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; VI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] ; VI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) ; VI: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]] - ; VI: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) - ; VI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] + ; VI: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) + ; VI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]] ; VI: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32) ; VI: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]] ; VI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32) ; VI: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]] - ; VI: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16) - ; VI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL6]] + ; VI: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16) + ; VI: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]] ; VI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; VI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]] ; VI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32) ; VI: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]] - ; VI: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16) - ; VI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL7]] - ; VI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16), [[OR6]](s16), [[OR7]](s16) - ; VI: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; VI: [[GEP15:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C10]](s64) + ; VI: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16) + ; VI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]] + ; VI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; VI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; VI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32) + ; VI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] + ; VI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) + ; VI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32) + ; VI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; VI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) + ; VI: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; VI: [[GEP15:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C11]](s64) ; VI: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[GEP15]](p4) :: (load 1, addrspace 4) ; VI: [[GEP16:%[0-9]+]]:_(p4) = G_GEP [[GEP15]], [[C]](s64) ; VI: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[GEP16]](p4) :: (load 1, addrspace 4) @@ -7444,27 +8216,35 @@ body: | ; VI: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C7]] ; VI: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD17]](s32) ; VI: [[AND17:%[0-9]+]]:_(s16) = G_AND [[TRUNC17]], [[C7]] - ; VI: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND17]], [[C8]](s16) - ; VI: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[SHL8]] + ; VI: [[SHL12:%[0-9]+]]:_(s16) = G_SHL [[AND17]], [[C8]](s16) + ; VI: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[SHL12]] ; VI: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32) ; VI: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C7]] ; VI: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD19]](s32) ; VI: [[AND19:%[0-9]+]]:_(s16) = G_AND [[TRUNC19]], [[C7]] - ; VI: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND19]], [[C8]](s16) - ; VI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[SHL9]] + ; VI: [[SHL13:%[0-9]+]]:_(s16) = G_SHL [[AND19]], [[C8]](s16) + ; VI: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[SHL13]] ; VI: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD20]](s32) ; VI: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C7]] ; VI: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD21]](s32) ; VI: [[AND21:%[0-9]+]]:_(s16) = G_AND [[TRUNC21]], [[C7]] - ; VI: [[SHL10:%[0-9]+]]:_(s16) = G_SHL [[AND21]], [[C8]](s16) - ; VI: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[SHL10]] + ; VI: [[SHL14:%[0-9]+]]:_(s16) = G_SHL [[AND21]], [[C8]](s16) + ; VI: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[SHL14]] ; VI: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32) ; VI: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C7]] ; VI: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD23]](s32) ; VI: [[AND23:%[0-9]+]]:_(s16) = G_AND [[TRUNC23]], [[C7]] - ; VI: [[SHL11:%[0-9]+]]:_(s16) = G_SHL [[AND23]], [[C8]](s16) - ; VI: [[OR11:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[SHL11]] - ; VI: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR8]](s16), [[OR9]](s16), [[OR10]](s16), [[OR11]](s16) + ; VI: [[SHL15:%[0-9]+]]:_(s16) = G_SHL [[AND23]], [[C8]](s16) + ; VI: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[SHL15]] + ; VI: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16) + ; VI: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16) + ; VI: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C9]](s32) + ; VI: [[OR16:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL16]] + ; VI: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16) + ; VI: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR15]](s16) + ; VI: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C9]](s32) + ; VI: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]] + ; VI: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32) ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) ; VI: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF ; VI: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 @@ -7519,9 +8299,18 @@ body: | ; GFX9: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]] ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16) ; GFX9: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) - ; GFX9: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GFX9: [[GEP7:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C9]](s64) + ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) + ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) + ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) + ; GFX9: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; GFX9: [[GEP7:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C10]](s64) ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p4) :: (load 1, addrspace 4) ; GFX9: [[GEP8:%[0-9]+]]:_(p4) = G_GEP [[GEP7]], [[C]](s64) ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p4) :: (load 1, addrspace 4) @@ -7541,29 +8330,37 @@ body: | ; GFX9: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] ; GFX9: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) ; GFX9: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]] - ; GFX9: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) - ; GFX9: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]] + ; GFX9: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) + ; GFX9: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]] ; GFX9: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; GFX9: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] ; GFX9: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) ; GFX9: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]] - ; GFX9: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) - ; GFX9: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] + ; GFX9: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) + ; GFX9: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]] ; GFX9: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32) ; GFX9: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]] ; GFX9: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32) ; GFX9: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]] - ; GFX9: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16) - ; GFX9: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL6]] + ; GFX9: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16) + ; GFX9: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]] ; GFX9: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; GFX9: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]] ; GFX9: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32) ; GFX9: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]] - ; GFX9: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16) - ; GFX9: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL7]] - ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16), [[OR6]](s16), [[OR7]](s16) - ; GFX9: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9: [[GEP15:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C10]](s64) + ; GFX9: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16) + ; GFX9: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]] + ; GFX9: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; GFX9: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; GFX9: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32) + ; GFX9: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] + ; GFX9: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) + ; GFX9: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; GFX9: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32) + ; GFX9: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) + ; GFX9: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; GFX9: [[GEP15:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C11]](s64) ; GFX9: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[GEP15]](p4) :: (load 1, addrspace 4) ; GFX9: [[GEP16:%[0-9]+]]:_(p4) = G_GEP [[GEP15]], [[C]](s64) ; GFX9: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[GEP16]](p4) :: (load 1, addrspace 4) @@ -7583,27 +8380,35 @@ body: | ; GFX9: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C7]] ; GFX9: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD17]](s32) ; GFX9: [[AND17:%[0-9]+]]:_(s16) = G_AND [[TRUNC17]], [[C7]] - ; GFX9: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND17]], [[C8]](s16) - ; GFX9: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[SHL8]] + ; GFX9: [[SHL12:%[0-9]+]]:_(s16) = G_SHL [[AND17]], [[C8]](s16) + ; GFX9: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[SHL12]] ; GFX9: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32) ; GFX9: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C7]] ; GFX9: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD19]](s32) ; GFX9: [[AND19:%[0-9]+]]:_(s16) = G_AND [[TRUNC19]], [[C7]] - ; GFX9: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND19]], [[C8]](s16) - ; GFX9: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[SHL9]] + ; GFX9: [[SHL13:%[0-9]+]]:_(s16) = G_SHL [[AND19]], [[C8]](s16) + ; GFX9: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[SHL13]] ; GFX9: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD20]](s32) ; GFX9: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C7]] ; GFX9: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD21]](s32) ; GFX9: [[AND21:%[0-9]+]]:_(s16) = G_AND [[TRUNC21]], [[C7]] - ; GFX9: [[SHL10:%[0-9]+]]:_(s16) = G_SHL [[AND21]], [[C8]](s16) - ; GFX9: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[SHL10]] + ; GFX9: [[SHL14:%[0-9]+]]:_(s16) = G_SHL [[AND21]], [[C8]](s16) + ; GFX9: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[SHL14]] ; GFX9: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32) ; GFX9: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C7]] ; GFX9: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD23]](s32) ; GFX9: [[AND23:%[0-9]+]]:_(s16) = G_AND [[TRUNC23]], [[C7]] - ; GFX9: [[SHL11:%[0-9]+]]:_(s16) = G_SHL [[AND23]], [[C8]](s16) - ; GFX9: [[OR11:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[SHL11]] - ; GFX9: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR8]](s16), [[OR9]](s16), [[OR10]](s16), [[OR11]](s16) + ; GFX9: [[SHL15:%[0-9]+]]:_(s16) = G_SHL [[AND23]], [[C8]](s16) + ; GFX9: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[SHL15]] + ; GFX9: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16) + ; GFX9: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16) + ; GFX9: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C9]](s32) + ; GFX9: [[OR16:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL16]] + ; GFX9: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16) + ; GFX9: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR15]](s16) + ; GFX9: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C9]](s32) + ; GFX9: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]] + ; GFX9: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32) ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 @@ -7666,9 +8471,18 @@ body: | ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32) ; CI-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; CI-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) - ; CI-MESA: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-MESA: [[GEP7:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C10]](s64) + ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI-MESA: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32) + ; CI-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; CI-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CI-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32) + ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; CI-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) + ; CI-MESA: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; CI-MESA: [[GEP7:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C11]](s64) ; CI-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p4) :: (load 1, addrspace 4) ; CI-MESA: [[GEP8:%[0-9]+]]:_(p4) = G_GEP [[GEP7]], [[C]](s64) ; CI-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p4) :: (load 1, addrspace 4) @@ -7689,36 +8503,44 @@ body: | ; CI-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) ; CI-MESA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C9]] - ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY8]](s32) - ; CI-MESA: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; CI-MESA: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] + ; CI-MESA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY8]](s32) + ; CI-MESA: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) + ; CI-MESA: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] ; CI-MESA: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; CI-MESA: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] ; CI-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI-MESA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) ; CI-MESA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C9]] - ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY10]](s32) - ; CI-MESA: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) - ; CI-MESA: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] + ; CI-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY10]](s32) + ; CI-MESA: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) + ; CI-MESA: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] ; CI-MESA: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32) ; CI-MESA: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]] ; CI-MESA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI-MESA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) ; CI-MESA: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C9]] - ; CI-MESA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY12]](s32) - ; CI-MESA: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) - ; CI-MESA: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] + ; CI-MESA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY12]](s32) + ; CI-MESA: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32) + ; CI-MESA: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] ; CI-MESA: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; CI-MESA: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]] ; CI-MESA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI-MESA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32) ; CI-MESA: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C9]] - ; CI-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY14]](s32) - ; CI-MESA: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) - ; CI-MESA: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] - ; CI-MESA: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16), [[OR6]](s16), [[OR7]](s16) - ; CI-MESA: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI-MESA: [[GEP15:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C11]](s64) + ; CI-MESA: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY14]](s32) + ; CI-MESA: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) + ; CI-MESA: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] + ; CI-MESA: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; CI-MESA: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; CI-MESA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C10]](s32) + ; CI-MESA: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] + ; CI-MESA: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) + ; CI-MESA: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; CI-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C10]](s32) + ; CI-MESA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; CI-MESA: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) + ; CI-MESA: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; CI-MESA: [[GEP15:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C12]](s64) ; CI-MESA: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[GEP15]](p4) :: (load 1, addrspace 4) ; CI-MESA: [[GEP16:%[0-9]+]]:_(p4) = G_GEP [[GEP15]], [[C]](s64) ; CI-MESA: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[GEP16]](p4) :: (load 1, addrspace 4) @@ -7739,34 +8561,42 @@ body: | ; CI-MESA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI-MESA: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LOAD17]](s32) ; CI-MESA: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C9]] - ; CI-MESA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[COPY16]](s32) - ; CI-MESA: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32) - ; CI-MESA: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[TRUNC17]] + ; CI-MESA: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[COPY16]](s32) + ; CI-MESA: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[SHL12]](s32) + ; CI-MESA: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[TRUNC17]] ; CI-MESA: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32) ; CI-MESA: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C7]] ; CI-MESA: [[COPY18:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI-MESA: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LOAD19]](s32) ; CI-MESA: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C9]] - ; CI-MESA: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[COPY18]](s32) - ; CI-MESA: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) - ; CI-MESA: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[TRUNC19]] + ; CI-MESA: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[COPY18]](s32) + ; CI-MESA: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[SHL13]](s32) + ; CI-MESA: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[TRUNC19]] ; CI-MESA: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD20]](s32) ; CI-MESA: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C7]] ; CI-MESA: [[COPY20:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI-MESA: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LOAD21]](s32) ; CI-MESA: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C9]] - ; CI-MESA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[COPY20]](s32) - ; CI-MESA: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[SHL10]](s32) - ; CI-MESA: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[TRUNC21]] + ; CI-MESA: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[COPY20]](s32) + ; CI-MESA: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[SHL14]](s32) + ; CI-MESA: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[TRUNC21]] ; CI-MESA: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32) ; CI-MESA: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C7]] ; CI-MESA: [[COPY22:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI-MESA: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LOAD23]](s32) ; CI-MESA: [[AND23:%[0-9]+]]:_(s32) = G_AND [[COPY23]], [[C9]] - ; CI-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[COPY22]](s32) - ; CI-MESA: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[SHL11]](s32) - ; CI-MESA: [[OR11:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[TRUNC23]] - ; CI-MESA: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR8]](s16), [[OR9]](s16), [[OR10]](s16), [[OR11]](s16) + ; CI-MESA: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[COPY22]](s32) + ; CI-MESA: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[SHL15]](s32) + ; CI-MESA: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[TRUNC23]] + ; CI-MESA: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16) + ; CI-MESA: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16) + ; CI-MESA: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C10]](s32) + ; CI-MESA: [[OR16:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL16]] + ; CI-MESA: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16) + ; CI-MESA: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR15]](s16) + ; CI-MESA: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C10]](s32) + ; CI-MESA: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]] + ; CI-MESA: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32) ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) ; CI-MESA: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 @@ -7821,9 +8651,18 @@ body: | ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]] ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16) ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; GFX9-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) - ; GFX9-MESA: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GFX9-MESA: [[GEP7:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C9]](s64) + ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9-MESA: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) + ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; GFX9-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; GFX9-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) + ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; GFX9-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) + ; GFX9-MESA: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; GFX9-MESA: [[GEP7:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C10]](s64) ; GFX9-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p4) :: (load 1, addrspace 4) ; GFX9-MESA: [[GEP8:%[0-9]+]]:_(p4) = G_GEP [[GEP7]], [[C]](s64) ; GFX9-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p4) :: (load 1, addrspace 4) @@ -7843,29 +8682,37 @@ body: | ; GFX9-MESA: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] ; GFX9-MESA: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) ; GFX9-MESA: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]] - ; GFX9-MESA: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) - ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]] + ; GFX9-MESA: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) + ; GFX9-MESA: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]] ; GFX9-MESA: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; GFX9-MESA: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] ; GFX9-MESA: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) ; GFX9-MESA: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]] - ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) - ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] + ; GFX9-MESA: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) + ; GFX9-MESA: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]] ; GFX9-MESA: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32) ; GFX9-MESA: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]] ; GFX9-MESA: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32) ; GFX9-MESA: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]] - ; GFX9-MESA: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16) - ; GFX9-MESA: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL6]] + ; GFX9-MESA: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16) + ; GFX9-MESA: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]] ; GFX9-MESA: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; GFX9-MESA: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]] ; GFX9-MESA: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32) ; GFX9-MESA: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]] - ; GFX9-MESA: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16) - ; GFX9-MESA: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL7]] - ; GFX9-MESA: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16), [[OR6]](s16), [[OR7]](s16) - ; GFX9-MESA: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9-MESA: [[GEP15:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C10]](s64) + ; GFX9-MESA: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16) + ; GFX9-MESA: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]] + ; GFX9-MESA: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; GFX9-MESA: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; GFX9-MESA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32) + ; GFX9-MESA: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] + ; GFX9-MESA: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) + ; GFX9-MESA: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; GFX9-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32) + ; GFX9-MESA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; GFX9-MESA: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) + ; GFX9-MESA: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; GFX9-MESA: [[GEP15:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C11]](s64) ; GFX9-MESA: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[GEP15]](p4) :: (load 1, addrspace 4) ; GFX9-MESA: [[GEP16:%[0-9]+]]:_(p4) = G_GEP [[GEP15]], [[C]](s64) ; GFX9-MESA: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[GEP16]](p4) :: (load 1, addrspace 4) @@ -7885,27 +8732,35 @@ body: | ; GFX9-MESA: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C7]] ; GFX9-MESA: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD17]](s32) ; GFX9-MESA: [[AND17:%[0-9]+]]:_(s16) = G_AND [[TRUNC17]], [[C7]] - ; GFX9-MESA: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND17]], [[C8]](s16) - ; GFX9-MESA: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[SHL8]] + ; GFX9-MESA: [[SHL12:%[0-9]+]]:_(s16) = G_SHL [[AND17]], [[C8]](s16) + ; GFX9-MESA: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[SHL12]] ; GFX9-MESA: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32) ; GFX9-MESA: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C7]] ; GFX9-MESA: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD19]](s32) ; GFX9-MESA: [[AND19:%[0-9]+]]:_(s16) = G_AND [[TRUNC19]], [[C7]] - ; GFX9-MESA: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND19]], [[C8]](s16) - ; GFX9-MESA: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[SHL9]] + ; GFX9-MESA: [[SHL13:%[0-9]+]]:_(s16) = G_SHL [[AND19]], [[C8]](s16) + ; GFX9-MESA: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[SHL13]] ; GFX9-MESA: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD20]](s32) ; GFX9-MESA: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C7]] ; GFX9-MESA: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD21]](s32) ; GFX9-MESA: [[AND21:%[0-9]+]]:_(s16) = G_AND [[TRUNC21]], [[C7]] - ; GFX9-MESA: [[SHL10:%[0-9]+]]:_(s16) = G_SHL [[AND21]], [[C8]](s16) - ; GFX9-MESA: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[SHL10]] + ; GFX9-MESA: [[SHL14:%[0-9]+]]:_(s16) = G_SHL [[AND21]], [[C8]](s16) + ; GFX9-MESA: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[SHL14]] ; GFX9-MESA: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32) ; GFX9-MESA: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C7]] ; GFX9-MESA: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD23]](s32) ; GFX9-MESA: [[AND23:%[0-9]+]]:_(s16) = G_AND [[TRUNC23]], [[C7]] - ; GFX9-MESA: [[SHL11:%[0-9]+]]:_(s16) = G_SHL [[AND23]], [[C8]](s16) - ; GFX9-MESA: [[OR11:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[SHL11]] - ; GFX9-MESA: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR8]](s16), [[OR9]](s16), [[OR10]](s16), [[OR11]](s16) + ; GFX9-MESA: [[SHL15:%[0-9]+]]:_(s16) = G_SHL [[AND23]], [[C8]](s16) + ; GFX9-MESA: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[SHL15]] + ; GFX9-MESA: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16) + ; GFX9-MESA: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16) + ; GFX9-MESA: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C9]](s32) + ; GFX9-MESA: [[OR16:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL16]] + ; GFX9-MESA: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16) + ; GFX9-MESA: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR15]](s16) + ; GFX9-MESA: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C9]](s32) + ; GFX9-MESA: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]] + ; GFX9-MESA: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32) ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 @@ -8043,9 +8898,18 @@ body: | ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32) ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) - ; CI: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI: [[GEP7:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C10]](s64) + ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32) + ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; CI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32) + ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) + ; CI: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; CI: [[GEP7:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C11]](s64) ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p4) :: (load 1, addrspace 4) ; CI: [[GEP8:%[0-9]+]]:_(p4) = G_GEP [[GEP7]], [[C]](s64) ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p4) :: (load 1, addrspace 4) @@ -8066,36 +8930,44 @@ body: | ; CI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) ; CI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C9]] - ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY8]](s32) - ; CI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; CI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] + ; CI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY8]](s32) + ; CI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) + ; CI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] ; CI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; CI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] ; CI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) ; CI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C9]] - ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY10]](s32) - ; CI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) - ; CI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] + ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY10]](s32) + ; CI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) + ; CI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] ; CI: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32) ; CI: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]] ; CI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) ; CI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C9]] - ; CI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY12]](s32) - ; CI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) - ; CI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] + ; CI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY12]](s32) + ; CI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32) + ; CI: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] ; CI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; CI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]] ; CI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32) ; CI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C9]] - ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY14]](s32) - ; CI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) - ; CI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] - ; CI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16), [[OR6]](s16), [[OR7]](s16) - ; CI: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI: [[GEP15:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C11]](s64) + ; CI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY14]](s32) + ; CI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) + ; CI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] + ; CI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; CI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; CI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C10]](s32) + ; CI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] + ; CI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) + ; CI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; CI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C10]](s32) + ; CI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; CI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) + ; CI: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; CI: [[GEP15:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C12]](s64) ; CI: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[GEP15]](p4) :: (load 1, addrspace 4) ; CI: [[GEP16:%[0-9]+]]:_(p4) = G_GEP [[GEP15]], [[C]](s64) ; CI: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[GEP16]](p4) :: (load 1, addrspace 4) @@ -8116,36 +8988,44 @@ body: | ; CI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LOAD17]](s32) ; CI: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C9]] - ; CI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[COPY16]](s32) - ; CI: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32) - ; CI: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[TRUNC17]] + ; CI: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[COPY16]](s32) + ; CI: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[SHL12]](s32) + ; CI: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[TRUNC17]] ; CI: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32) ; CI: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C7]] ; CI: [[COPY18:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LOAD19]](s32) ; CI: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C9]] - ; CI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[COPY18]](s32) - ; CI: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) - ; CI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[TRUNC19]] + ; CI: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[COPY18]](s32) + ; CI: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[SHL13]](s32) + ; CI: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[TRUNC19]] ; CI: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD20]](s32) ; CI: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C7]] ; CI: [[COPY20:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LOAD21]](s32) ; CI: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C9]] - ; CI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[COPY20]](s32) - ; CI: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[SHL10]](s32) - ; CI: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[TRUNC21]] + ; CI: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[COPY20]](s32) + ; CI: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[SHL14]](s32) + ; CI: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[TRUNC21]] ; CI: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32) ; CI: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C7]] ; CI: [[COPY22:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LOAD23]](s32) ; CI: [[AND23:%[0-9]+]]:_(s32) = G_AND [[COPY23]], [[C9]] - ; CI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[COPY22]](s32) - ; CI: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[SHL11]](s32) - ; CI: [[OR11:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[TRUNC23]] - ; CI: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR8]](s16), [[OR9]](s16), [[OR10]](s16), [[OR11]](s16) - ; CI: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 - ; CI: [[GEP23:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C12]](s64) + ; CI: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[COPY22]](s32) + ; CI: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[SHL15]](s32) + ; CI: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[TRUNC23]] + ; CI: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16) + ; CI: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16) + ; CI: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C10]](s32) + ; CI: [[OR16:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL16]] + ; CI: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16) + ; CI: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR15]](s16) + ; CI: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C10]](s32) + ; CI: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]] + ; CI: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32) + ; CI: [[C13:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 + ; CI: [[GEP23:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C13]](s64) ; CI: [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[GEP23]](p4) :: (load 1, addrspace 4) ; CI: [[GEP24:%[0-9]+]]:_(p4) = G_GEP [[GEP23]], [[C]](s64) ; CI: [[LOAD25:%[0-9]+]]:_(s32) = G_LOAD [[GEP24]](p4) :: (load 1, addrspace 4) @@ -8166,34 +9046,42 @@ body: | ; CI: [[COPY24:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI: [[COPY25:%[0-9]+]]:_(s32) = COPY [[LOAD25]](s32) ; CI: [[AND25:%[0-9]+]]:_(s32) = G_AND [[COPY25]], [[C9]] - ; CI: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND25]], [[COPY24]](s32) - ; CI: [[TRUNC25:%[0-9]+]]:_(s16) = G_TRUNC [[SHL12]](s32) - ; CI: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND24]], [[TRUNC25]] + ; CI: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[AND25]], [[COPY24]](s32) + ; CI: [[TRUNC25:%[0-9]+]]:_(s16) = G_TRUNC [[SHL18]](s32) + ; CI: [[OR18:%[0-9]+]]:_(s16) = G_OR [[AND24]], [[TRUNC25]] ; CI: [[TRUNC26:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD26]](s32) ; CI: [[AND26:%[0-9]+]]:_(s16) = G_AND [[TRUNC26]], [[C7]] ; CI: [[COPY26:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI: [[COPY27:%[0-9]+]]:_(s32) = COPY [[LOAD27]](s32) ; CI: [[AND27:%[0-9]+]]:_(s32) = G_AND [[COPY27]], [[C9]] - ; CI: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND27]], [[COPY26]](s32) - ; CI: [[TRUNC27:%[0-9]+]]:_(s16) = G_TRUNC [[SHL13]](s32) - ; CI: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND26]], [[TRUNC27]] + ; CI: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[AND27]], [[COPY26]](s32) + ; CI: [[TRUNC27:%[0-9]+]]:_(s16) = G_TRUNC [[SHL19]](s32) + ; CI: [[OR19:%[0-9]+]]:_(s16) = G_OR [[AND26]], [[TRUNC27]] ; CI: [[TRUNC28:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD28]](s32) ; CI: [[AND28:%[0-9]+]]:_(s16) = G_AND [[TRUNC28]], [[C7]] ; CI: [[COPY28:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI: [[COPY29:%[0-9]+]]:_(s32) = COPY [[LOAD29]](s32) ; CI: [[AND29:%[0-9]+]]:_(s32) = G_AND [[COPY29]], [[C9]] - ; CI: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND29]], [[COPY28]](s32) - ; CI: [[TRUNC29:%[0-9]+]]:_(s16) = G_TRUNC [[SHL14]](s32) - ; CI: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND28]], [[TRUNC29]] + ; CI: [[SHL20:%[0-9]+]]:_(s32) = G_SHL [[AND29]], [[COPY28]](s32) + ; CI: [[TRUNC29:%[0-9]+]]:_(s16) = G_TRUNC [[SHL20]](s32) + ; CI: [[OR20:%[0-9]+]]:_(s16) = G_OR [[AND28]], [[TRUNC29]] ; CI: [[TRUNC30:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD30]](s32) ; CI: [[AND30:%[0-9]+]]:_(s16) = G_AND [[TRUNC30]], [[C7]] ; CI: [[COPY30:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI: [[COPY31:%[0-9]+]]:_(s32) = COPY [[LOAD31]](s32) ; CI: [[AND31:%[0-9]+]]:_(s32) = G_AND [[COPY31]], [[C9]] - ; CI: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND31]], [[COPY30]](s32) - ; CI: [[TRUNC31:%[0-9]+]]:_(s16) = G_TRUNC [[SHL15]](s32) - ; CI: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND30]], [[TRUNC31]] - ; CI: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR12]](s16), [[OR13]](s16), [[OR14]](s16), [[OR15]](s16) + ; CI: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[AND31]], [[COPY30]](s32) + ; CI: [[TRUNC31:%[0-9]+]]:_(s16) = G_TRUNC [[SHL21]](s32) + ; CI: [[OR21:%[0-9]+]]:_(s16) = G_OR [[AND30]], [[TRUNC31]] + ; CI: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[OR18]](s16) + ; CI: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[OR19]](s16) + ; CI: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[ZEXT13]], [[C10]](s32) + ; CI: [[OR22:%[0-9]+]]:_(s32) = G_OR [[ZEXT12]], [[SHL22]] + ; CI: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[OR20]](s16) + ; CI: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[OR21]](s16) + ; CI: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[ZEXT15]], [[C10]](s32) + ; CI: [[OR23:%[0-9]+]]:_(s32) = G_OR [[ZEXT14]], [[SHL23]] + ; CI: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR22]](s32), [[OR23]](s32) ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; VI-LABEL: name: test_load_constant_v4s64_align1 @@ -8246,9 +9134,18 @@ body: | ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]] ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16) ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) - ; VI: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI: [[GEP7:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C9]](s64) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) + ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) + ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) + ; VI: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; VI: [[GEP7:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C10]](s64) ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p4) :: (load 1, addrspace 4) ; VI: [[GEP8:%[0-9]+]]:_(p4) = G_GEP [[GEP7]], [[C]](s64) ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p4) :: (load 1, addrspace 4) @@ -8268,29 +9165,37 @@ body: | ; VI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] ; VI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) ; VI: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]] - ; VI: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) - ; VI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]] + ; VI: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) + ; VI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]] ; VI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; VI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] ; VI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) ; VI: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]] - ; VI: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) - ; VI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] + ; VI: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) + ; VI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]] ; VI: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32) ; VI: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]] ; VI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32) ; VI: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]] - ; VI: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16) - ; VI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL6]] + ; VI: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16) + ; VI: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]] ; VI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; VI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]] ; VI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32) ; VI: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]] - ; VI: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16) - ; VI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL7]] - ; VI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16), [[OR6]](s16), [[OR7]](s16) - ; VI: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; VI: [[GEP15:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C10]](s64) + ; VI: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16) + ; VI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]] + ; VI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; VI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; VI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32) + ; VI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] + ; VI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) + ; VI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32) + ; VI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; VI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) + ; VI: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; VI: [[GEP15:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C11]](s64) ; VI: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[GEP15]](p4) :: (load 1, addrspace 4) ; VI: [[GEP16:%[0-9]+]]:_(p4) = G_GEP [[GEP15]], [[C]](s64) ; VI: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[GEP16]](p4) :: (load 1, addrspace 4) @@ -8310,29 +9215,37 @@ body: | ; VI: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C7]] ; VI: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD17]](s32) ; VI: [[AND17:%[0-9]+]]:_(s16) = G_AND [[TRUNC17]], [[C7]] - ; VI: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND17]], [[C8]](s16) - ; VI: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[SHL8]] + ; VI: [[SHL12:%[0-9]+]]:_(s16) = G_SHL [[AND17]], [[C8]](s16) + ; VI: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[SHL12]] ; VI: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32) ; VI: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C7]] ; VI: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD19]](s32) ; VI: [[AND19:%[0-9]+]]:_(s16) = G_AND [[TRUNC19]], [[C7]] - ; VI: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND19]], [[C8]](s16) - ; VI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[SHL9]] + ; VI: [[SHL13:%[0-9]+]]:_(s16) = G_SHL [[AND19]], [[C8]](s16) + ; VI: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[SHL13]] ; VI: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD20]](s32) ; VI: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C7]] ; VI: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD21]](s32) ; VI: [[AND21:%[0-9]+]]:_(s16) = G_AND [[TRUNC21]], [[C7]] - ; VI: [[SHL10:%[0-9]+]]:_(s16) = G_SHL [[AND21]], [[C8]](s16) - ; VI: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[SHL10]] + ; VI: [[SHL14:%[0-9]+]]:_(s16) = G_SHL [[AND21]], [[C8]](s16) + ; VI: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[SHL14]] ; VI: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32) ; VI: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C7]] ; VI: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD23]](s32) ; VI: [[AND23:%[0-9]+]]:_(s16) = G_AND [[TRUNC23]], [[C7]] - ; VI: [[SHL11:%[0-9]+]]:_(s16) = G_SHL [[AND23]], [[C8]](s16) - ; VI: [[OR11:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[SHL11]] - ; VI: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR8]](s16), [[OR9]](s16), [[OR10]](s16), [[OR11]](s16) - ; VI: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 - ; VI: [[GEP23:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C11]](s64) + ; VI: [[SHL15:%[0-9]+]]:_(s16) = G_SHL [[AND23]], [[C8]](s16) + ; VI: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[SHL15]] + ; VI: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16) + ; VI: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16) + ; VI: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C9]](s32) + ; VI: [[OR16:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL16]] + ; VI: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16) + ; VI: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR15]](s16) + ; VI: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C9]](s32) + ; VI: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]] + ; VI: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32) + ; VI: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 + ; VI: [[GEP23:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C12]](s64) ; VI: [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[GEP23]](p4) :: (load 1, addrspace 4) ; VI: [[GEP24:%[0-9]+]]:_(p4) = G_GEP [[GEP23]], [[C]](s64) ; VI: [[LOAD25:%[0-9]+]]:_(s32) = G_LOAD [[GEP24]](p4) :: (load 1, addrspace 4) @@ -8352,27 +9265,35 @@ body: | ; VI: [[AND24:%[0-9]+]]:_(s16) = G_AND [[TRUNC24]], [[C7]] ; VI: [[TRUNC25:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD25]](s32) ; VI: [[AND25:%[0-9]+]]:_(s16) = G_AND [[TRUNC25]], [[C7]] - ; VI: [[SHL12:%[0-9]+]]:_(s16) = G_SHL [[AND25]], [[C8]](s16) - ; VI: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND24]], [[SHL12]] + ; VI: [[SHL18:%[0-9]+]]:_(s16) = G_SHL [[AND25]], [[C8]](s16) + ; VI: [[OR18:%[0-9]+]]:_(s16) = G_OR [[AND24]], [[SHL18]] ; VI: [[TRUNC26:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD26]](s32) ; VI: [[AND26:%[0-9]+]]:_(s16) = G_AND [[TRUNC26]], [[C7]] ; VI: [[TRUNC27:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD27]](s32) ; VI: [[AND27:%[0-9]+]]:_(s16) = G_AND [[TRUNC27]], [[C7]] - ; VI: [[SHL13:%[0-9]+]]:_(s16) = G_SHL [[AND27]], [[C8]](s16) - ; VI: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND26]], [[SHL13]] + ; VI: [[SHL19:%[0-9]+]]:_(s16) = G_SHL [[AND27]], [[C8]](s16) + ; VI: [[OR19:%[0-9]+]]:_(s16) = G_OR [[AND26]], [[SHL19]] ; VI: [[TRUNC28:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD28]](s32) ; VI: [[AND28:%[0-9]+]]:_(s16) = G_AND [[TRUNC28]], [[C7]] ; VI: [[TRUNC29:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD29]](s32) ; VI: [[AND29:%[0-9]+]]:_(s16) = G_AND [[TRUNC29]], [[C7]] - ; VI: [[SHL14:%[0-9]+]]:_(s16) = G_SHL [[AND29]], [[C8]](s16) - ; VI: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND28]], [[SHL14]] + ; VI: [[SHL20:%[0-9]+]]:_(s16) = G_SHL [[AND29]], [[C8]](s16) + ; VI: [[OR20:%[0-9]+]]:_(s16) = G_OR [[AND28]], [[SHL20]] ; VI: [[TRUNC30:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD30]](s32) ; VI: [[AND30:%[0-9]+]]:_(s16) = G_AND [[TRUNC30]], [[C7]] ; VI: [[TRUNC31:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD31]](s32) ; VI: [[AND31:%[0-9]+]]:_(s16) = G_AND [[TRUNC31]], [[C7]] - ; VI: [[SHL15:%[0-9]+]]:_(s16) = G_SHL [[AND31]], [[C8]](s16) - ; VI: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND30]], [[SHL15]] - ; VI: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR12]](s16), [[OR13]](s16), [[OR14]](s16), [[OR15]](s16) + ; VI: [[SHL21:%[0-9]+]]:_(s16) = G_SHL [[AND31]], [[C8]](s16) + ; VI: [[OR21:%[0-9]+]]:_(s16) = G_OR [[AND30]], [[SHL21]] + ; VI: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[OR18]](s16) + ; VI: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[OR19]](s16) + ; VI: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[ZEXT13]], [[C9]](s32) + ; VI: [[OR22:%[0-9]+]]:_(s32) = G_OR [[ZEXT12]], [[SHL22]] + ; VI: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[OR20]](s16) + ; VI: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[OR21]](s16) + ; VI: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[ZEXT15]], [[C9]](s32) + ; VI: [[OR23:%[0-9]+]]:_(s32) = G_OR [[ZEXT14]], [[SHL23]] + ; VI: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR22]](s32), [[OR23]](s32) ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; GFX9-LABEL: name: test_load_constant_v4s64_align1 @@ -8425,9 +9346,18 @@ body: | ; GFX9: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]] ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16) ; GFX9: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) - ; GFX9: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GFX9: [[GEP7:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C9]](s64) + ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) + ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) + ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) + ; GFX9: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; GFX9: [[GEP7:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C10]](s64) ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p4) :: (load 1, addrspace 4) ; GFX9: [[GEP8:%[0-9]+]]:_(p4) = G_GEP [[GEP7]], [[C]](s64) ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p4) :: (load 1, addrspace 4) @@ -8447,29 +9377,37 @@ body: | ; GFX9: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] ; GFX9: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) ; GFX9: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]] - ; GFX9: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) - ; GFX9: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]] + ; GFX9: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) + ; GFX9: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]] ; GFX9: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; GFX9: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] ; GFX9: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) ; GFX9: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]] - ; GFX9: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) - ; GFX9: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] + ; GFX9: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) + ; GFX9: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]] ; GFX9: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32) ; GFX9: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]] ; GFX9: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32) ; GFX9: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]] - ; GFX9: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16) - ; GFX9: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL6]] + ; GFX9: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16) + ; GFX9: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]] ; GFX9: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; GFX9: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]] ; GFX9: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32) ; GFX9: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]] - ; GFX9: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16) - ; GFX9: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL7]] - ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16), [[OR6]](s16), [[OR7]](s16) - ; GFX9: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9: [[GEP15:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C10]](s64) + ; GFX9: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16) + ; GFX9: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]] + ; GFX9: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; GFX9: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; GFX9: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32) + ; GFX9: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] + ; GFX9: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) + ; GFX9: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; GFX9: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32) + ; GFX9: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) + ; GFX9: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; GFX9: [[GEP15:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C11]](s64) ; GFX9: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[GEP15]](p4) :: (load 1, addrspace 4) ; GFX9: [[GEP16:%[0-9]+]]:_(p4) = G_GEP [[GEP15]], [[C]](s64) ; GFX9: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[GEP16]](p4) :: (load 1, addrspace 4) @@ -8489,29 +9427,37 @@ body: | ; GFX9: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C7]] ; GFX9: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD17]](s32) ; GFX9: [[AND17:%[0-9]+]]:_(s16) = G_AND [[TRUNC17]], [[C7]] - ; GFX9: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND17]], [[C8]](s16) - ; GFX9: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[SHL8]] + ; GFX9: [[SHL12:%[0-9]+]]:_(s16) = G_SHL [[AND17]], [[C8]](s16) + ; GFX9: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[SHL12]] ; GFX9: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32) ; GFX9: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C7]] ; GFX9: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD19]](s32) ; GFX9: [[AND19:%[0-9]+]]:_(s16) = G_AND [[TRUNC19]], [[C7]] - ; GFX9: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND19]], [[C8]](s16) - ; GFX9: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[SHL9]] + ; GFX9: [[SHL13:%[0-9]+]]:_(s16) = G_SHL [[AND19]], [[C8]](s16) + ; GFX9: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[SHL13]] ; GFX9: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD20]](s32) ; GFX9: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C7]] ; GFX9: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD21]](s32) ; GFX9: [[AND21:%[0-9]+]]:_(s16) = G_AND [[TRUNC21]], [[C7]] - ; GFX9: [[SHL10:%[0-9]+]]:_(s16) = G_SHL [[AND21]], [[C8]](s16) - ; GFX9: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[SHL10]] + ; GFX9: [[SHL14:%[0-9]+]]:_(s16) = G_SHL [[AND21]], [[C8]](s16) + ; GFX9: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[SHL14]] ; GFX9: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32) ; GFX9: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C7]] ; GFX9: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD23]](s32) ; GFX9: [[AND23:%[0-9]+]]:_(s16) = G_AND [[TRUNC23]], [[C7]] - ; GFX9: [[SHL11:%[0-9]+]]:_(s16) = G_SHL [[AND23]], [[C8]](s16) - ; GFX9: [[OR11:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[SHL11]] - ; GFX9: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR8]](s16), [[OR9]](s16), [[OR10]](s16), [[OR11]](s16) - ; GFX9: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 - ; GFX9: [[GEP23:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C11]](s64) + ; GFX9: [[SHL15:%[0-9]+]]:_(s16) = G_SHL [[AND23]], [[C8]](s16) + ; GFX9: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[SHL15]] + ; GFX9: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16) + ; GFX9: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16) + ; GFX9: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C9]](s32) + ; GFX9: [[OR16:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL16]] + ; GFX9: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16) + ; GFX9: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR15]](s16) + ; GFX9: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C9]](s32) + ; GFX9: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]] + ; GFX9: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32) + ; GFX9: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 + ; GFX9: [[GEP23:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C12]](s64) ; GFX9: [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[GEP23]](p4) :: (load 1, addrspace 4) ; GFX9: [[GEP24:%[0-9]+]]:_(p4) = G_GEP [[GEP23]], [[C]](s64) ; GFX9: [[LOAD25:%[0-9]+]]:_(s32) = G_LOAD [[GEP24]](p4) :: (load 1, addrspace 4) @@ -8531,27 +9477,35 @@ body: | ; GFX9: [[AND24:%[0-9]+]]:_(s16) = G_AND [[TRUNC24]], [[C7]] ; GFX9: [[TRUNC25:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD25]](s32) ; GFX9: [[AND25:%[0-9]+]]:_(s16) = G_AND [[TRUNC25]], [[C7]] - ; GFX9: [[SHL12:%[0-9]+]]:_(s16) = G_SHL [[AND25]], [[C8]](s16) - ; GFX9: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND24]], [[SHL12]] + ; GFX9: [[SHL18:%[0-9]+]]:_(s16) = G_SHL [[AND25]], [[C8]](s16) + ; GFX9: [[OR18:%[0-9]+]]:_(s16) = G_OR [[AND24]], [[SHL18]] ; GFX9: [[TRUNC26:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD26]](s32) ; GFX9: [[AND26:%[0-9]+]]:_(s16) = G_AND [[TRUNC26]], [[C7]] ; GFX9: [[TRUNC27:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD27]](s32) ; GFX9: [[AND27:%[0-9]+]]:_(s16) = G_AND [[TRUNC27]], [[C7]] - ; GFX9: [[SHL13:%[0-9]+]]:_(s16) = G_SHL [[AND27]], [[C8]](s16) - ; GFX9: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND26]], [[SHL13]] + ; GFX9: [[SHL19:%[0-9]+]]:_(s16) = G_SHL [[AND27]], [[C8]](s16) + ; GFX9: [[OR19:%[0-9]+]]:_(s16) = G_OR [[AND26]], [[SHL19]] ; GFX9: [[TRUNC28:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD28]](s32) ; GFX9: [[AND28:%[0-9]+]]:_(s16) = G_AND [[TRUNC28]], [[C7]] ; GFX9: [[TRUNC29:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD29]](s32) ; GFX9: [[AND29:%[0-9]+]]:_(s16) = G_AND [[TRUNC29]], [[C7]] - ; GFX9: [[SHL14:%[0-9]+]]:_(s16) = G_SHL [[AND29]], [[C8]](s16) - ; GFX9: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND28]], [[SHL14]] + ; GFX9: [[SHL20:%[0-9]+]]:_(s16) = G_SHL [[AND29]], [[C8]](s16) + ; GFX9: [[OR20:%[0-9]+]]:_(s16) = G_OR [[AND28]], [[SHL20]] ; GFX9: [[TRUNC30:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD30]](s32) ; GFX9: [[AND30:%[0-9]+]]:_(s16) = G_AND [[TRUNC30]], [[C7]] ; GFX9: [[TRUNC31:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD31]](s32) ; GFX9: [[AND31:%[0-9]+]]:_(s16) = G_AND [[TRUNC31]], [[C7]] - ; GFX9: [[SHL15:%[0-9]+]]:_(s16) = G_SHL [[AND31]], [[C8]](s16) - ; GFX9: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND30]], [[SHL15]] - ; GFX9: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR12]](s16), [[OR13]](s16), [[OR14]](s16), [[OR15]](s16) + ; GFX9: [[SHL21:%[0-9]+]]:_(s16) = G_SHL [[AND31]], [[C8]](s16) + ; GFX9: [[OR21:%[0-9]+]]:_(s16) = G_OR [[AND30]], [[SHL21]] + ; GFX9: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[OR18]](s16) + ; GFX9: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[OR19]](s16) + ; GFX9: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[ZEXT13]], [[C9]](s32) + ; GFX9: [[OR22:%[0-9]+]]:_(s32) = G_OR [[ZEXT12]], [[SHL22]] + ; GFX9: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[OR20]](s16) + ; GFX9: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[OR21]](s16) + ; GFX9: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[ZEXT15]], [[C9]](s32) + ; GFX9: [[OR23:%[0-9]+]]:_(s32) = G_OR [[ZEXT14]], [[SHL23]] + ; GFX9: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR22]](s32), [[OR23]](s32) ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; CI-MESA-LABEL: name: test_load_constant_v4s64_align1 @@ -8612,9 +9566,18 @@ body: | ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32) ; CI-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; CI-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) - ; CI-MESA: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-MESA: [[GEP7:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C10]](s64) + ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI-MESA: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32) + ; CI-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; CI-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CI-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32) + ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; CI-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) + ; CI-MESA: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; CI-MESA: [[GEP7:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C11]](s64) ; CI-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p4) :: (load 1, addrspace 4) ; CI-MESA: [[GEP8:%[0-9]+]]:_(p4) = G_GEP [[GEP7]], [[C]](s64) ; CI-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p4) :: (load 1, addrspace 4) @@ -8635,36 +9598,44 @@ body: | ; CI-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) ; CI-MESA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C9]] - ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY8]](s32) - ; CI-MESA: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; CI-MESA: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] + ; CI-MESA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY8]](s32) + ; CI-MESA: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) + ; CI-MESA: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] ; CI-MESA: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; CI-MESA: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] ; CI-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI-MESA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) ; CI-MESA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C9]] - ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY10]](s32) - ; CI-MESA: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) - ; CI-MESA: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] + ; CI-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY10]](s32) + ; CI-MESA: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) + ; CI-MESA: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] ; CI-MESA: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32) ; CI-MESA: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]] ; CI-MESA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI-MESA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) ; CI-MESA: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C9]] - ; CI-MESA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY12]](s32) - ; CI-MESA: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) - ; CI-MESA: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] + ; CI-MESA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY12]](s32) + ; CI-MESA: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32) + ; CI-MESA: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] ; CI-MESA: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; CI-MESA: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]] ; CI-MESA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI-MESA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32) ; CI-MESA: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C9]] - ; CI-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY14]](s32) - ; CI-MESA: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) - ; CI-MESA: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] - ; CI-MESA: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16), [[OR6]](s16), [[OR7]](s16) - ; CI-MESA: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI-MESA: [[GEP15:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C11]](s64) + ; CI-MESA: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY14]](s32) + ; CI-MESA: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) + ; CI-MESA: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] + ; CI-MESA: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; CI-MESA: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; CI-MESA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C10]](s32) + ; CI-MESA: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] + ; CI-MESA: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) + ; CI-MESA: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; CI-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C10]](s32) + ; CI-MESA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; CI-MESA: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) + ; CI-MESA: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; CI-MESA: [[GEP15:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C12]](s64) ; CI-MESA: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[GEP15]](p4) :: (load 1, addrspace 4) ; CI-MESA: [[GEP16:%[0-9]+]]:_(p4) = G_GEP [[GEP15]], [[C]](s64) ; CI-MESA: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[GEP16]](p4) :: (load 1, addrspace 4) @@ -8685,36 +9656,44 @@ body: | ; CI-MESA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI-MESA: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LOAD17]](s32) ; CI-MESA: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C9]] - ; CI-MESA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[COPY16]](s32) - ; CI-MESA: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32) - ; CI-MESA: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[TRUNC17]] + ; CI-MESA: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[COPY16]](s32) + ; CI-MESA: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[SHL12]](s32) + ; CI-MESA: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[TRUNC17]] ; CI-MESA: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32) ; CI-MESA: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C7]] ; CI-MESA: [[COPY18:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI-MESA: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LOAD19]](s32) ; CI-MESA: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C9]] - ; CI-MESA: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[COPY18]](s32) - ; CI-MESA: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) - ; CI-MESA: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[TRUNC19]] + ; CI-MESA: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[COPY18]](s32) + ; CI-MESA: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[SHL13]](s32) + ; CI-MESA: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[TRUNC19]] ; CI-MESA: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD20]](s32) ; CI-MESA: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C7]] ; CI-MESA: [[COPY20:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI-MESA: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LOAD21]](s32) ; CI-MESA: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C9]] - ; CI-MESA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[COPY20]](s32) - ; CI-MESA: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[SHL10]](s32) - ; CI-MESA: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[TRUNC21]] + ; CI-MESA: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[COPY20]](s32) + ; CI-MESA: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[SHL14]](s32) + ; CI-MESA: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[TRUNC21]] ; CI-MESA: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32) ; CI-MESA: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C7]] ; CI-MESA: [[COPY22:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI-MESA: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LOAD23]](s32) ; CI-MESA: [[AND23:%[0-9]+]]:_(s32) = G_AND [[COPY23]], [[C9]] - ; CI-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[COPY22]](s32) - ; CI-MESA: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[SHL11]](s32) - ; CI-MESA: [[OR11:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[TRUNC23]] - ; CI-MESA: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR8]](s16), [[OR9]](s16), [[OR10]](s16), [[OR11]](s16) - ; CI-MESA: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 - ; CI-MESA: [[GEP23:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C12]](s64) + ; CI-MESA: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[COPY22]](s32) + ; CI-MESA: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[SHL15]](s32) + ; CI-MESA: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[TRUNC23]] + ; CI-MESA: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16) + ; CI-MESA: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16) + ; CI-MESA: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C10]](s32) + ; CI-MESA: [[OR16:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL16]] + ; CI-MESA: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16) + ; CI-MESA: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR15]](s16) + ; CI-MESA: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C10]](s32) + ; CI-MESA: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]] + ; CI-MESA: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32) + ; CI-MESA: [[C13:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 + ; CI-MESA: [[GEP23:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C13]](s64) ; CI-MESA: [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[GEP23]](p4) :: (load 1, addrspace 4) ; CI-MESA: [[GEP24:%[0-9]+]]:_(p4) = G_GEP [[GEP23]], [[C]](s64) ; CI-MESA: [[LOAD25:%[0-9]+]]:_(s32) = G_LOAD [[GEP24]](p4) :: (load 1, addrspace 4) @@ -8735,34 +9714,42 @@ body: | ; CI-MESA: [[COPY24:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI-MESA: [[COPY25:%[0-9]+]]:_(s32) = COPY [[LOAD25]](s32) ; CI-MESA: [[AND25:%[0-9]+]]:_(s32) = G_AND [[COPY25]], [[C9]] - ; CI-MESA: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND25]], [[COPY24]](s32) - ; CI-MESA: [[TRUNC25:%[0-9]+]]:_(s16) = G_TRUNC [[SHL12]](s32) - ; CI-MESA: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND24]], [[TRUNC25]] + ; CI-MESA: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[AND25]], [[COPY24]](s32) + ; CI-MESA: [[TRUNC25:%[0-9]+]]:_(s16) = G_TRUNC [[SHL18]](s32) + ; CI-MESA: [[OR18:%[0-9]+]]:_(s16) = G_OR [[AND24]], [[TRUNC25]] ; CI-MESA: [[TRUNC26:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD26]](s32) ; CI-MESA: [[AND26:%[0-9]+]]:_(s16) = G_AND [[TRUNC26]], [[C7]] ; CI-MESA: [[COPY26:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI-MESA: [[COPY27:%[0-9]+]]:_(s32) = COPY [[LOAD27]](s32) ; CI-MESA: [[AND27:%[0-9]+]]:_(s32) = G_AND [[COPY27]], [[C9]] - ; CI-MESA: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND27]], [[COPY26]](s32) - ; CI-MESA: [[TRUNC27:%[0-9]+]]:_(s16) = G_TRUNC [[SHL13]](s32) - ; CI-MESA: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND26]], [[TRUNC27]] + ; CI-MESA: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[AND27]], [[COPY26]](s32) + ; CI-MESA: [[TRUNC27:%[0-9]+]]:_(s16) = G_TRUNC [[SHL19]](s32) + ; CI-MESA: [[OR19:%[0-9]+]]:_(s16) = G_OR [[AND26]], [[TRUNC27]] ; CI-MESA: [[TRUNC28:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD28]](s32) ; CI-MESA: [[AND28:%[0-9]+]]:_(s16) = G_AND [[TRUNC28]], [[C7]] ; CI-MESA: [[COPY28:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI-MESA: [[COPY29:%[0-9]+]]:_(s32) = COPY [[LOAD29]](s32) ; CI-MESA: [[AND29:%[0-9]+]]:_(s32) = G_AND [[COPY29]], [[C9]] - ; CI-MESA: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND29]], [[COPY28]](s32) - ; CI-MESA: [[TRUNC29:%[0-9]+]]:_(s16) = G_TRUNC [[SHL14]](s32) - ; CI-MESA: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND28]], [[TRUNC29]] + ; CI-MESA: [[SHL20:%[0-9]+]]:_(s32) = G_SHL [[AND29]], [[COPY28]](s32) + ; CI-MESA: [[TRUNC29:%[0-9]+]]:_(s16) = G_TRUNC [[SHL20]](s32) + ; CI-MESA: [[OR20:%[0-9]+]]:_(s16) = G_OR [[AND28]], [[TRUNC29]] ; CI-MESA: [[TRUNC30:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD30]](s32) ; CI-MESA: [[AND30:%[0-9]+]]:_(s16) = G_AND [[TRUNC30]], [[C7]] ; CI-MESA: [[COPY30:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI-MESA: [[COPY31:%[0-9]+]]:_(s32) = COPY [[LOAD31]](s32) ; CI-MESA: [[AND31:%[0-9]+]]:_(s32) = G_AND [[COPY31]], [[C9]] - ; CI-MESA: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND31]], [[COPY30]](s32) - ; CI-MESA: [[TRUNC31:%[0-9]+]]:_(s16) = G_TRUNC [[SHL15]](s32) - ; CI-MESA: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND30]], [[TRUNC31]] - ; CI-MESA: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR12]](s16), [[OR13]](s16), [[OR14]](s16), [[OR15]](s16) + ; CI-MESA: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[AND31]], [[COPY30]](s32) + ; CI-MESA: [[TRUNC31:%[0-9]+]]:_(s16) = G_TRUNC [[SHL21]](s32) + ; CI-MESA: [[OR21:%[0-9]+]]:_(s16) = G_OR [[AND30]], [[TRUNC31]] + ; CI-MESA: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[OR18]](s16) + ; CI-MESA: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[OR19]](s16) + ; CI-MESA: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[ZEXT13]], [[C10]](s32) + ; CI-MESA: [[OR22:%[0-9]+]]:_(s32) = G_OR [[ZEXT12]], [[SHL22]] + ; CI-MESA: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[OR20]](s16) + ; CI-MESA: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[OR21]](s16) + ; CI-MESA: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[ZEXT15]], [[C10]](s32) + ; CI-MESA: [[OR23:%[0-9]+]]:_(s32) = G_OR [[ZEXT14]], [[SHL23]] + ; CI-MESA: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR22]](s32), [[OR23]](s32) ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; GFX9-MESA-LABEL: name: test_load_constant_v4s64_align1 @@ -8815,9 +9802,18 @@ body: | ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]] ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16) ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; GFX9-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) - ; GFX9-MESA: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GFX9-MESA: [[GEP7:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C9]](s64) + ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9-MESA: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) + ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; GFX9-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; GFX9-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) + ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; GFX9-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) + ; GFX9-MESA: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; GFX9-MESA: [[GEP7:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C10]](s64) ; GFX9-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p4) :: (load 1, addrspace 4) ; GFX9-MESA: [[GEP8:%[0-9]+]]:_(p4) = G_GEP [[GEP7]], [[C]](s64) ; GFX9-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p4) :: (load 1, addrspace 4) @@ -8837,29 +9833,37 @@ body: | ; GFX9-MESA: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] ; GFX9-MESA: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) ; GFX9-MESA: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]] - ; GFX9-MESA: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) - ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]] + ; GFX9-MESA: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) + ; GFX9-MESA: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]] ; GFX9-MESA: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; GFX9-MESA: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] ; GFX9-MESA: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) ; GFX9-MESA: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]] - ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) - ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] + ; GFX9-MESA: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) + ; GFX9-MESA: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]] ; GFX9-MESA: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32) ; GFX9-MESA: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]] ; GFX9-MESA: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32) ; GFX9-MESA: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]] - ; GFX9-MESA: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16) - ; GFX9-MESA: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL6]] + ; GFX9-MESA: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16) + ; GFX9-MESA: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]] ; GFX9-MESA: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; GFX9-MESA: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]] ; GFX9-MESA: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32) ; GFX9-MESA: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]] - ; GFX9-MESA: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16) - ; GFX9-MESA: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL7]] - ; GFX9-MESA: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16), [[OR6]](s16), [[OR7]](s16) - ; GFX9-MESA: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9-MESA: [[GEP15:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C10]](s64) + ; GFX9-MESA: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16) + ; GFX9-MESA: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]] + ; GFX9-MESA: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; GFX9-MESA: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; GFX9-MESA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32) + ; GFX9-MESA: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] + ; GFX9-MESA: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) + ; GFX9-MESA: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; GFX9-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32) + ; GFX9-MESA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; GFX9-MESA: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) + ; GFX9-MESA: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; GFX9-MESA: [[GEP15:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C11]](s64) ; GFX9-MESA: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[GEP15]](p4) :: (load 1, addrspace 4) ; GFX9-MESA: [[GEP16:%[0-9]+]]:_(p4) = G_GEP [[GEP15]], [[C]](s64) ; GFX9-MESA: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[GEP16]](p4) :: (load 1, addrspace 4) @@ -8879,29 +9883,37 @@ body: | ; GFX9-MESA: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C7]] ; GFX9-MESA: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD17]](s32) ; GFX9-MESA: [[AND17:%[0-9]+]]:_(s16) = G_AND [[TRUNC17]], [[C7]] - ; GFX9-MESA: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND17]], [[C8]](s16) - ; GFX9-MESA: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[SHL8]] + ; GFX9-MESA: [[SHL12:%[0-9]+]]:_(s16) = G_SHL [[AND17]], [[C8]](s16) + ; GFX9-MESA: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[SHL12]] ; GFX9-MESA: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32) ; GFX9-MESA: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C7]] ; GFX9-MESA: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD19]](s32) ; GFX9-MESA: [[AND19:%[0-9]+]]:_(s16) = G_AND [[TRUNC19]], [[C7]] - ; GFX9-MESA: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND19]], [[C8]](s16) - ; GFX9-MESA: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[SHL9]] + ; GFX9-MESA: [[SHL13:%[0-9]+]]:_(s16) = G_SHL [[AND19]], [[C8]](s16) + ; GFX9-MESA: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[SHL13]] ; GFX9-MESA: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD20]](s32) ; GFX9-MESA: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C7]] ; GFX9-MESA: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD21]](s32) ; GFX9-MESA: [[AND21:%[0-9]+]]:_(s16) = G_AND [[TRUNC21]], [[C7]] - ; GFX9-MESA: [[SHL10:%[0-9]+]]:_(s16) = G_SHL [[AND21]], [[C8]](s16) - ; GFX9-MESA: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[SHL10]] + ; GFX9-MESA: [[SHL14:%[0-9]+]]:_(s16) = G_SHL [[AND21]], [[C8]](s16) + ; GFX9-MESA: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[SHL14]] ; GFX9-MESA: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32) ; GFX9-MESA: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C7]] ; GFX9-MESA: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD23]](s32) ; GFX9-MESA: [[AND23:%[0-9]+]]:_(s16) = G_AND [[TRUNC23]], [[C7]] - ; GFX9-MESA: [[SHL11:%[0-9]+]]:_(s16) = G_SHL [[AND23]], [[C8]](s16) - ; GFX9-MESA: [[OR11:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[SHL11]] - ; GFX9-MESA: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR8]](s16), [[OR9]](s16), [[OR10]](s16), [[OR11]](s16) - ; GFX9-MESA: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 - ; GFX9-MESA: [[GEP23:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C11]](s64) + ; GFX9-MESA: [[SHL15:%[0-9]+]]:_(s16) = G_SHL [[AND23]], [[C8]](s16) + ; GFX9-MESA: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[SHL15]] + ; GFX9-MESA: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16) + ; GFX9-MESA: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16) + ; GFX9-MESA: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C9]](s32) + ; GFX9-MESA: [[OR16:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL16]] + ; GFX9-MESA: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16) + ; GFX9-MESA: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR15]](s16) + ; GFX9-MESA: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C9]](s32) + ; GFX9-MESA: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]] + ; GFX9-MESA: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32) + ; GFX9-MESA: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 + ; GFX9-MESA: [[GEP23:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C12]](s64) ; GFX9-MESA: [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[GEP23]](p4) :: (load 1, addrspace 4) ; GFX9-MESA: [[GEP24:%[0-9]+]]:_(p4) = G_GEP [[GEP23]], [[C]](s64) ; GFX9-MESA: [[LOAD25:%[0-9]+]]:_(s32) = G_LOAD [[GEP24]](p4) :: (load 1, addrspace 4) @@ -8921,27 +9933,35 @@ body: | ; GFX9-MESA: [[AND24:%[0-9]+]]:_(s16) = G_AND [[TRUNC24]], [[C7]] ; GFX9-MESA: [[TRUNC25:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD25]](s32) ; GFX9-MESA: [[AND25:%[0-9]+]]:_(s16) = G_AND [[TRUNC25]], [[C7]] - ; GFX9-MESA: [[SHL12:%[0-9]+]]:_(s16) = G_SHL [[AND25]], [[C8]](s16) - ; GFX9-MESA: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND24]], [[SHL12]] + ; GFX9-MESA: [[SHL18:%[0-9]+]]:_(s16) = G_SHL [[AND25]], [[C8]](s16) + ; GFX9-MESA: [[OR18:%[0-9]+]]:_(s16) = G_OR [[AND24]], [[SHL18]] ; GFX9-MESA: [[TRUNC26:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD26]](s32) ; GFX9-MESA: [[AND26:%[0-9]+]]:_(s16) = G_AND [[TRUNC26]], [[C7]] ; GFX9-MESA: [[TRUNC27:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD27]](s32) ; GFX9-MESA: [[AND27:%[0-9]+]]:_(s16) = G_AND [[TRUNC27]], [[C7]] - ; GFX9-MESA: [[SHL13:%[0-9]+]]:_(s16) = G_SHL [[AND27]], [[C8]](s16) - ; GFX9-MESA: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND26]], [[SHL13]] + ; GFX9-MESA: [[SHL19:%[0-9]+]]:_(s16) = G_SHL [[AND27]], [[C8]](s16) + ; GFX9-MESA: [[OR19:%[0-9]+]]:_(s16) = G_OR [[AND26]], [[SHL19]] ; GFX9-MESA: [[TRUNC28:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD28]](s32) ; GFX9-MESA: [[AND28:%[0-9]+]]:_(s16) = G_AND [[TRUNC28]], [[C7]] ; GFX9-MESA: [[TRUNC29:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD29]](s32) ; GFX9-MESA: [[AND29:%[0-9]+]]:_(s16) = G_AND [[TRUNC29]], [[C7]] - ; GFX9-MESA: [[SHL14:%[0-9]+]]:_(s16) = G_SHL [[AND29]], [[C8]](s16) - ; GFX9-MESA: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND28]], [[SHL14]] + ; GFX9-MESA: [[SHL20:%[0-9]+]]:_(s16) = G_SHL [[AND29]], [[C8]](s16) + ; GFX9-MESA: [[OR20:%[0-9]+]]:_(s16) = G_OR [[AND28]], [[SHL20]] ; GFX9-MESA: [[TRUNC30:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD30]](s32) ; GFX9-MESA: [[AND30:%[0-9]+]]:_(s16) = G_AND [[TRUNC30]], [[C7]] ; GFX9-MESA: [[TRUNC31:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD31]](s32) ; GFX9-MESA: [[AND31:%[0-9]+]]:_(s16) = G_AND [[TRUNC31]], [[C7]] - ; GFX9-MESA: [[SHL15:%[0-9]+]]:_(s16) = G_SHL [[AND31]], [[C8]](s16) - ; GFX9-MESA: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND30]], [[SHL15]] - ; GFX9-MESA: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR12]](s16), [[OR13]](s16), [[OR14]](s16), [[OR15]](s16) + ; GFX9-MESA: [[SHL21:%[0-9]+]]:_(s16) = G_SHL [[AND31]], [[C8]](s16) + ; GFX9-MESA: [[OR21:%[0-9]+]]:_(s16) = G_OR [[AND30]], [[SHL21]] + ; GFX9-MESA: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[OR18]](s16) + ; GFX9-MESA: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[OR19]](s16) + ; GFX9-MESA: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[ZEXT13]], [[C9]](s32) + ; GFX9-MESA: [[OR22:%[0-9]+]]:_(s32) = G_OR [[ZEXT12]], [[SHL22]] + ; GFX9-MESA: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[OR20]](s16) + ; GFX9-MESA: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[OR21]](s16) + ; GFX9-MESA: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[ZEXT15]], [[C9]](s32) + ; GFX9-MESA: [[OR23:%[0-9]+]]:_(s32) = G_OR [[ZEXT14]], [[SHL23]] + ; GFX9-MESA: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR22]](s32), [[OR23]](s32) ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) %0:_(p4) = COPY $vgpr0_vgpr1 @@ -9137,9 +10157,18 @@ body: | ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32) ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) - ; CI: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI: [[GEP7:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C10]](s64) + ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32) + ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; CI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32) + ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; CI: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) + ; CI: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; CI: [[GEP7:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C11]](s64) ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p4) :: (load 1, addrspace 4) ; CI: [[GEP8:%[0-9]+]]:_(p4) = G_GEP [[GEP7]], [[C]](s64) ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p4) :: (load 1, addrspace 4) @@ -9160,34 +10189,42 @@ body: | ; CI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) ; CI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C9]] - ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY8]](s32) - ; CI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; CI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] + ; CI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY8]](s32) + ; CI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) + ; CI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] ; CI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; CI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] ; CI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) ; CI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C9]] - ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY10]](s32) - ; CI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) - ; CI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] + ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY10]](s32) + ; CI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) + ; CI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] ; CI: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32) ; CI: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]] ; CI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) ; CI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C9]] - ; CI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY12]](s32) - ; CI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) - ; CI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] + ; CI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY12]](s32) + ; CI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32) + ; CI: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] ; CI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; CI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]] ; CI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32) ; CI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C9]] - ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY14]](s32) - ; CI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) - ; CI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] - ; CI: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16), [[OR6]](s16), [[OR7]](s16) + ; CI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY14]](s32) + ; CI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) + ; CI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] + ; CI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; CI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; CI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C10]](s32) + ; CI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] + ; CI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) + ; CI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; CI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C10]](s32) + ; CI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; CI: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[MV]](p1), [[MV1]](p1) ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x p1>) ; VI-LABEL: name: test_load_constant_v2p1_align1 @@ -9240,9 +10277,18 @@ body: | ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]] ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16) ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; VI: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) - ; VI: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI: [[GEP7:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C9]](s64) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) + ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) + ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; VI: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) + ; VI: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; VI: [[GEP7:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C10]](s64) ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p4) :: (load 1, addrspace 4) ; VI: [[GEP8:%[0-9]+]]:_(p4) = G_GEP [[GEP7]], [[C]](s64) ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p4) :: (load 1, addrspace 4) @@ -9262,27 +10308,35 @@ body: | ; VI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] ; VI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) ; VI: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]] - ; VI: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) - ; VI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]] + ; VI: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) + ; VI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]] ; VI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; VI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] ; VI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) ; VI: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]] - ; VI: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) - ; VI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] + ; VI: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) + ; VI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]] ; VI: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32) ; VI: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]] ; VI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32) ; VI: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]] - ; VI: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16) - ; VI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL6]] + ; VI: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16) + ; VI: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]] ; VI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; VI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]] ; VI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32) ; VI: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]] - ; VI: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16) - ; VI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL7]] - ; VI: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16), [[OR6]](s16), [[OR7]](s16) + ; VI: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16) + ; VI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]] + ; VI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; VI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; VI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32) + ; VI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] + ; VI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) + ; VI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32) + ; VI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; VI: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[MV]](p1), [[MV1]](p1) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x p1>) ; GFX9-LABEL: name: test_load_constant_v2p1_align1 @@ -9335,9 +10389,18 @@ body: | ; GFX9: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]] ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16) ; GFX9: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; GFX9: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) - ; GFX9: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GFX9: [[GEP7:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C9]](s64) + ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) + ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) + ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; GFX9: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) + ; GFX9: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; GFX9: [[GEP7:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C10]](s64) ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p4) :: (load 1, addrspace 4) ; GFX9: [[GEP8:%[0-9]+]]:_(p4) = G_GEP [[GEP7]], [[C]](s64) ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p4) :: (load 1, addrspace 4) @@ -9357,27 +10420,35 @@ body: | ; GFX9: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] ; GFX9: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) ; GFX9: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]] - ; GFX9: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) - ; GFX9: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]] + ; GFX9: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) + ; GFX9: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]] ; GFX9: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; GFX9: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] ; GFX9: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) ; GFX9: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]] - ; GFX9: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) - ; GFX9: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] + ; GFX9: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) + ; GFX9: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]] ; GFX9: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32) ; GFX9: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]] ; GFX9: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32) ; GFX9: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]] - ; GFX9: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16) - ; GFX9: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL6]] + ; GFX9: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16) + ; GFX9: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]] ; GFX9: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; GFX9: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]] ; GFX9: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32) ; GFX9: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]] - ; GFX9: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16) - ; GFX9: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL7]] - ; GFX9: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16), [[OR6]](s16), [[OR7]](s16) + ; GFX9: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16) + ; GFX9: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]] + ; GFX9: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; GFX9: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; GFX9: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32) + ; GFX9: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] + ; GFX9: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) + ; GFX9: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; GFX9: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32) + ; GFX9: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; GFX9: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[MV]](p1), [[MV1]](p1) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x p1>) ; CI-MESA-LABEL: name: test_load_constant_v2p1_align1 @@ -9438,9 +10509,18 @@ body: | ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32) ; CI-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; CI-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI-MESA: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) - ; CI-MESA: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-MESA: [[GEP7:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C10]](s64) + ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI-MESA: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32) + ; CI-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; CI-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CI-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32) + ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; CI-MESA: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) + ; CI-MESA: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; CI-MESA: [[GEP7:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C11]](s64) ; CI-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p4) :: (load 1, addrspace 4) ; CI-MESA: [[GEP8:%[0-9]+]]:_(p4) = G_GEP [[GEP7]], [[C]](s64) ; CI-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p4) :: (load 1, addrspace 4) @@ -9461,34 +10541,42 @@ body: | ; CI-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) ; CI-MESA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C9]] - ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY8]](s32) - ; CI-MESA: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; CI-MESA: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] + ; CI-MESA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY8]](s32) + ; CI-MESA: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) + ; CI-MESA: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] ; CI-MESA: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; CI-MESA: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] ; CI-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI-MESA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) ; CI-MESA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C9]] - ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY10]](s32) - ; CI-MESA: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) - ; CI-MESA: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] + ; CI-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY10]](s32) + ; CI-MESA: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) + ; CI-MESA: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] ; CI-MESA: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32) ; CI-MESA: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]] ; CI-MESA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI-MESA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) ; CI-MESA: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C9]] - ; CI-MESA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY12]](s32) - ; CI-MESA: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) - ; CI-MESA: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] + ; CI-MESA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY12]](s32) + ; CI-MESA: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32) + ; CI-MESA: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] ; CI-MESA: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; CI-MESA: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]] ; CI-MESA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI-MESA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32) ; CI-MESA: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C9]] - ; CI-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY14]](s32) - ; CI-MESA: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) - ; CI-MESA: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] - ; CI-MESA: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16), [[OR6]](s16), [[OR7]](s16) + ; CI-MESA: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY14]](s32) + ; CI-MESA: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) + ; CI-MESA: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] + ; CI-MESA: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; CI-MESA: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; CI-MESA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C10]](s32) + ; CI-MESA: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] + ; CI-MESA: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) + ; CI-MESA: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; CI-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C10]](s32) + ; CI-MESA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; CI-MESA: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[MV]](p1), [[MV1]](p1) ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x p1>) ; GFX9-MESA-LABEL: name: test_load_constant_v2p1_align1 @@ -9541,9 +10629,18 @@ body: | ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]] ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16) ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; GFX9-MESA: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) - ; GFX9-MESA: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GFX9-MESA: [[GEP7:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C9]](s64) + ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9-MESA: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) + ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; GFX9-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; GFX9-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) + ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; GFX9-MESA: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) + ; GFX9-MESA: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; GFX9-MESA: [[GEP7:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C10]](s64) ; GFX9-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p4) :: (load 1, addrspace 4) ; GFX9-MESA: [[GEP8:%[0-9]+]]:_(p4) = G_GEP [[GEP7]], [[C]](s64) ; GFX9-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p4) :: (load 1, addrspace 4) @@ -9563,27 +10660,35 @@ body: | ; GFX9-MESA: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] ; GFX9-MESA: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) ; GFX9-MESA: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]] - ; GFX9-MESA: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) - ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]] + ; GFX9-MESA: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) + ; GFX9-MESA: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]] ; GFX9-MESA: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; GFX9-MESA: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] ; GFX9-MESA: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) ; GFX9-MESA: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]] - ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) - ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] + ; GFX9-MESA: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) + ; GFX9-MESA: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]] ; GFX9-MESA: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32) ; GFX9-MESA: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]] ; GFX9-MESA: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32) ; GFX9-MESA: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]] - ; GFX9-MESA: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16) - ; GFX9-MESA: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL6]] + ; GFX9-MESA: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16) + ; GFX9-MESA: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]] ; GFX9-MESA: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; GFX9-MESA: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]] ; GFX9-MESA: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32) ; GFX9-MESA: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]] - ; GFX9-MESA: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16) - ; GFX9-MESA: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL7]] - ; GFX9-MESA: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16), [[OR6]](s16), [[OR7]](s16) + ; GFX9-MESA: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16) + ; GFX9-MESA: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]] + ; GFX9-MESA: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; GFX9-MESA: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; GFX9-MESA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32) + ; GFX9-MESA: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] + ; GFX9-MESA: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) + ; GFX9-MESA: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; GFX9-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32) + ; GFX9-MESA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; GFX9-MESA: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[MV]](p1), [[MV1]](p1) ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x p1>) %0:_(p4) = COPY $vgpr0_vgpr1 @@ -9689,9 +10794,14 @@ body: | ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI: [[MV:%[0-9]+]]:_(p3) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; CI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI: [[GEP3:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C6]](s64) + ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32) + ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; CI: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32) + ; CI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CI: [[GEP3:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C7]](s64) ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p4) :: (load 1, addrspace 4) ; CI: [[GEP4:%[0-9]+]]:_(p4) = G_GEP [[GEP3]], [[C]](s64) ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p4) :: (load 1, addrspace 4) @@ -9704,19 +10814,23 @@ body: | ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] - ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) - ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; CI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] + ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) + ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) + ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; CI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; CI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C5]] - ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY6]](s32) - ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI: [[MV1:%[0-9]+]]:_(p3) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) - ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[MV]](p3), [[MV1]](p3) + ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY6]](s32) + ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) + ; CI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] + ; CI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C6]](s32) + ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; CI: [[INTTOPTR1:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR5]](s32) + ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3) ; CI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>) ; VI-LABEL: name: test_load_constant_v2p3_align1 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 @@ -9744,9 +10858,14 @@ body: | ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; VI: [[MV:%[0-9]+]]:_(p3) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI: [[GEP3:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C5]](s64) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; VI: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32) + ; VI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; VI: [[GEP3:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C6]](s64) ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p4) :: (load 1, addrspace 4) ; VI: [[GEP4:%[0-9]+]]:_(p4) = G_GEP [[GEP3]], [[C]](s64) ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p4) :: (load 1, addrspace 4) @@ -9758,16 +10877,20 @@ body: | ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C3]] ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C3]] - ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) - ; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] + ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) + ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]] ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; VI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C3]] - ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) - ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; VI: [[MV1:%[0-9]+]]:_(p3) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[MV]](p3), [[MV1]](p3) + ; VI: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) + ; VI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL4]] + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C5]](s32) + ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; VI: [[INTTOPTR1:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR5]](s32) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3) ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>) ; GFX9-LABEL: name: test_load_constant_v2p3_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 @@ -9795,9 +10918,14 @@ body: | ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX9: [[MV:%[0-9]+]]:_(p3) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; GFX9: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9: [[GEP3:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C5]](s64) + ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; GFX9: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32) + ; GFX9: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; GFX9: [[GEP3:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C6]](s64) ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p4) :: (load 1, addrspace 4) ; GFX9: [[GEP4:%[0-9]+]]:_(p4) = G_GEP [[GEP3]], [[C]](s64) ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p4) :: (load 1, addrspace 4) @@ -9809,16 +10937,20 @@ body: | ; GFX9: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C3]] ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) ; GFX9: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C3]] - ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) - ; GFX9: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] + ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) + ; GFX9: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]] ; GFX9: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; GFX9: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; GFX9: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) ; GFX9: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C3]] - ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) - ; GFX9: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; GFX9: [[MV1:%[0-9]+]]:_(p3) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[MV]](p3), [[MV1]](p3) + ; GFX9: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) + ; GFX9: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL4]] + ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C5]](s32) + ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; GFX9: [[INTTOPTR1:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR5]](s32) + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3) ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>) ; CI-MESA-LABEL: name: test_load_constant_v2p3_align1 ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 @@ -9850,9 +10982,14 @@ body: | ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CI-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI-MESA: [[MV:%[0-9]+]]:_(p3) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; CI-MESA: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-MESA: [[GEP3:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C6]](s64) + ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32) + ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; CI-MESA: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32) + ; CI-MESA: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CI-MESA: [[GEP3:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C7]](s64) ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p4) :: (load 1, addrspace 4) ; CI-MESA: [[GEP4:%[0-9]+]]:_(p4) = G_GEP [[GEP3]], [[C]](s64) ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p4) :: (load 1, addrspace 4) @@ -9865,19 +11002,23 @@ body: | ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] - ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) - ; CI-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; CI-MESA: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] + ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) + ; CI-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) + ; CI-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; CI-MESA: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; CI-MESA: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C5]] - ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY6]](s32) - ; CI-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; CI-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI-MESA: [[MV1:%[0-9]+]]:_(p3) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) - ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[MV]](p3), [[MV1]](p3) + ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY6]](s32) + ; CI-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) + ; CI-MESA: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] + ; CI-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C6]](s32) + ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; CI-MESA: [[INTTOPTR1:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR5]](s32) + ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3) ; CI-MESA: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>) ; GFX9-MESA-LABEL: name: test_load_constant_v2p3_align1 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 @@ -9905,9 +11046,14 @@ body: | ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX9-MESA: [[MV:%[0-9]+]]:_(p3) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; GFX9-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-MESA: [[GEP3:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C5]](s64) + ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; GFX9-MESA: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32) + ; GFX9-MESA: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; GFX9-MESA: [[GEP3:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C6]](s64) ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p4) :: (load 1, addrspace 4) ; GFX9-MESA: [[GEP4:%[0-9]+]]:_(p4) = G_GEP [[GEP3]], [[C]](s64) ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p4) :: (load 1, addrspace 4) @@ -9919,16 +11065,20 @@ body: | ; GFX9-MESA: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C3]] ; GFX9-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) ; GFX9-MESA: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C3]] - ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) - ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] + ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) + ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]] ; GFX9-MESA: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; GFX9-MESA: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; GFX9-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C3]] - ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) - ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; GFX9-MESA: [[MV1:%[0-9]+]]:_(p3) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) - ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[MV]](p3), [[MV1]](p3) + ; GFX9-MESA: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) + ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL4]] + ; GFX9-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; GFX9-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C5]](s32) + ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; GFX9-MESA: [[INTTOPTR1:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR5]](s32) + ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3) ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<2 x p3>) = G_LOAD %0 :: (load 8, align 1, addrspace 4) @@ -10220,9 +11370,13 @@ body: | ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; CI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI: [[GEP3:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C6]](s64) + ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32) + ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; CI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CI: [[GEP3:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C7]](s64) ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p4) :: (load 1, addrspace 1) ; CI: [[GEP4:%[0-9]+]]:_(p4) = G_GEP [[GEP3]], [[C]](s64) ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p4) :: (load 1, addrspace 1) @@ -10235,19 +11389,22 @@ body: | ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] - ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) - ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; CI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] + ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) + ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) + ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; CI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; CI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C5]] - ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY6]](s32) - ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) - ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32) + ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY6]](s32) + ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) + ; CI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] + ; CI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C6]](s32) + ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) ; CI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; VI-LABEL: name: test_extload_constant_v2s32_from_4_align1 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 @@ -10275,9 +11432,13 @@ body: | ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; VI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI: [[GEP3:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C5]](s64) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; VI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; VI: [[GEP3:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C6]](s64) ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p4) :: (load 1, addrspace 1) ; VI: [[GEP4:%[0-9]+]]:_(p4) = G_GEP [[GEP3]], [[C]](s64) ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p4) :: (load 1, addrspace 1) @@ -10289,16 +11450,19 @@ body: | ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C3]] ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C3]] - ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) - ; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] + ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) + ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]] ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; VI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C3]] - ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) - ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; VI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32) + ; VI: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) + ; VI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL4]] + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C5]](s32) + ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-LABEL: name: test_extload_constant_v2s32_from_4_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 @@ -10326,9 +11490,13 @@ body: | ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX9: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; GFX9: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9: [[GEP3:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C5]](s64) + ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; GFX9: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; GFX9: [[GEP3:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C6]](s64) ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p4) :: (load 1, addrspace 1) ; GFX9: [[GEP4:%[0-9]+]]:_(p4) = G_GEP [[GEP3]], [[C]](s64) ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p4) :: (load 1, addrspace 1) @@ -10340,16 +11508,19 @@ body: | ; GFX9: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C3]] ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) ; GFX9: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C3]] - ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) - ; GFX9: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] + ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) + ; GFX9: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]] ; GFX9: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; GFX9: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; GFX9: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) ; GFX9: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C3]] - ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) - ; GFX9: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; GFX9: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32) + ; GFX9: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) + ; GFX9: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL4]] + ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C5]](s32) + ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; CI-MESA-LABEL: name: test_extload_constant_v2s32_from_4_align1 ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 @@ -10381,9 +11552,13 @@ body: | ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CI-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI-MESA: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; CI-MESA: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-MESA: [[GEP3:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C6]](s64) + ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32) + ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; CI-MESA: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CI-MESA: [[GEP3:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C7]](s64) ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p4) :: (load 1, addrspace 1) ; CI-MESA: [[GEP4:%[0-9]+]]:_(p4) = G_GEP [[GEP3]], [[C]](s64) ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p4) :: (load 1, addrspace 1) @@ -10396,19 +11571,22 @@ body: | ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] - ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) - ; CI-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; CI-MESA: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] + ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) + ; CI-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) + ; CI-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; CI-MESA: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; CI-MESA: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C5]] - ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY6]](s32) - ; CI-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; CI-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI-MESA: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) - ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32) + ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY6]](s32) + ; CI-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) + ; CI-MESA: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] + ; CI-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C6]](s32) + ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) ; CI-MESA: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-MESA-LABEL: name: test_extload_constant_v2s32_from_4_align1 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 @@ -10436,9 +11614,13 @@ body: | ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX9-MESA: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; GFX9-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-MESA: [[GEP3:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C5]](s64) + ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; GFX9-MESA: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; GFX9-MESA: [[GEP3:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C6]](s64) ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p4) :: (load 1, addrspace 1) ; GFX9-MESA: [[GEP4:%[0-9]+]]:_(p4) = G_GEP [[GEP3]], [[C]](s64) ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p4) :: (load 1, addrspace 1) @@ -10450,16 +11632,19 @@ body: | ; GFX9-MESA: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C3]] ; GFX9-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) ; GFX9-MESA: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C3]] - ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) - ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] + ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) + ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]] ; GFX9-MESA: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; GFX9-MESA: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; GFX9-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C3]] - ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) - ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; GFX9-MESA: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) - ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32) + ; GFX9-MESA: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) + ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL4]] + ; GFX9-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; GFX9-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C5]](s32) + ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = G_LOAD %0 :: (load 4, align 1, addrspace 1) @@ -10475,97 +11660,137 @@ body: | ; CI-LABEL: name: test_extload_constant_v2s32_from_4_align2 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 1) - ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C]](s64) ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p4) :: (load 2, addrspace 1) - ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; CI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI: [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C1]](s64) + ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CI: [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C3]](s64) ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p4) :: (load 2, addrspace 1) - ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI: [[GEP2:%[0-9]+]]:_(p4) = G_GEP [[GEP1]], [[C]](s64) ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p4) :: (load 2, addrspace 1) - ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; CI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC2]](s16), [[TRUNC3]](s16) - ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32) + ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) ; CI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; VI-LABEL: name: test_extload_constant_v2s32_from_4_align2 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 1) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C]](s64) ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p4) :: (load 2, addrspace 1) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; VI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI: [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C1]](s64) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; VI: [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C3]](s64) ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p4) :: (load 2, addrspace 1) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; VI: [[GEP2:%[0-9]+]]:_(p4) = G_GEP [[GEP1]], [[C]](s64) ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p4) :: (load 2, addrspace 1) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; VI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC2]](s16), [[TRUNC3]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32) + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-LABEL: name: test_extload_constant_v2s32_from_4_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 1) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C]](s64) ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p4) :: (load 2, addrspace 1) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9: [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C1]](s64) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX9: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; GFX9: [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C3]](s64) ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p4) :: (load 2, addrspace 1) - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; GFX9: [[GEP2:%[0-9]+]]:_(p4) = G_GEP [[GEP1]], [[C]](s64) ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p4) :: (load 2, addrspace 1) - ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX9: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; CI-MESA-LABEL: name: test_extload_constant_v2s32_from_4_align2 ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 1) - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI-MESA: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C]](s64) ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p4) :: (load 2, addrspace 1) - ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; CI-MESA: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-MESA: [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C1]](s64) + ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CI-MESA: [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C3]](s64) ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p4) :: (load 2, addrspace 1) - ; CI-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI-MESA: [[GEP2:%[0-9]+]]:_(p4) = G_GEP [[GEP1]], [[C]](s64) ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p4) :: (load 2, addrspace 1) - ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; CI-MESA: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC2]](s16), [[TRUNC3]](s16) - ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32) + ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) ; CI-MESA: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-MESA-LABEL: name: test_extload_constant_v2s32_from_4_align2 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 1) - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-MESA: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C]](s64) ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p4) :: (load 2, addrspace 1) - ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9-MESA: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-MESA: [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C1]](s64) + ; GFX9-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; GFX9-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; GFX9-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX9-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; GFX9-MESA: [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C3]](s64) ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p4) :: (load 2, addrspace 1) - ; GFX9-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; GFX9-MESA: [[GEP2:%[0-9]+]]:_(p4) = G_GEP [[GEP1]], [[C]](s64) ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p4) :: (load 2, addrspace 1) - ; GFX9-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX9-MESA: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32) + ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = G_LOAD %0 :: (load 4, align 2, addrspace 1) @@ -10842,9 +12067,22 @@ body: | ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C12]](s32) ; CI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) ; CI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] - ; CI: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16), [[OR4]](s16), [[OR5]](s16) - ; CI: [[C14:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; CI: [[GEP11:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C14]](s64) + ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C14]](s32) + ; CI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL6]] + ; CI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C14]](s32) + ; CI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL7]] + ; CI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; CI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) + ; CI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C14]](s32) + ; CI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] + ; CI: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR6]](s32), [[OR7]](s32), [[OR8]](s32) + ; CI: [[C15:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 + ; CI: [[GEP11:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C15]](s64) ; CI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[GEP11]](p4) :: (load 1, addrspace 1) ; CI: [[GEP12:%[0-9]+]]:_(p4) = G_GEP [[GEP11]], [[C]](s64) ; CI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[GEP12]](p4) :: (load 1, addrspace 1) @@ -10873,50 +12111,62 @@ body: | ; CI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C12]](s32) ; CI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) ; CI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C13]] - ; CI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY12]](s32) - ; CI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) - ; CI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] + ; CI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY12]](s32) + ; CI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) + ; CI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] ; CI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; CI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C11]] ; CI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C12]](s32) ; CI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32) ; CI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C13]] - ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY14]](s32) - ; CI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) - ; CI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] + ; CI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY14]](s32) + ; CI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL10]](s32) + ; CI: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] ; CI: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD16]](s32) ; CI: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C11]] ; CI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[C12]](s32) ; CI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LOAD17]](s32) ; CI: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C13]] - ; CI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[COPY16]](s32) - ; CI: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32) - ; CI: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[TRUNC17]] + ; CI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[COPY16]](s32) + ; CI: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[SHL11]](s32) + ; CI: [[OR11:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[TRUNC17]] ; CI: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32) ; CI: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C11]] ; CI: [[COPY18:%[0-9]+]]:_(s32) = COPY [[C12]](s32) ; CI: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LOAD19]](s32) ; CI: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C13]] - ; CI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[COPY18]](s32) - ; CI: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) - ; CI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[TRUNC19]] + ; CI: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[COPY18]](s32) + ; CI: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[SHL12]](s32) + ; CI: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[TRUNC19]] ; CI: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD20]](s32) ; CI: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C11]] ; CI: [[COPY20:%[0-9]+]]:_(s32) = COPY [[C12]](s32) ; CI: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LOAD21]](s32) ; CI: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C13]] - ; CI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[COPY20]](s32) - ; CI: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[SHL10]](s32) - ; CI: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[TRUNC21]] + ; CI: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[COPY20]](s32) + ; CI: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[SHL13]](s32) + ; CI: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[TRUNC21]] ; CI: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32) ; CI: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C11]] ; CI: [[COPY22:%[0-9]+]]:_(s32) = COPY [[C12]](s32) ; CI: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LOAD23]](s32) ; CI: [[AND23:%[0-9]+]]:_(s32) = G_AND [[COPY23]], [[C13]] - ; CI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[COPY22]](s32) - ; CI: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[SHL11]](s32) - ; CI: [[OR11:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[TRUNC23]] - ; CI: [[MV1:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR6]](s16), [[OR7]](s16), [[OR8]](s16), [[OR9]](s16), [[OR10]](s16), [[OR11]](s16) + ; CI: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[COPY22]](s32) + ; CI: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[SHL14]](s32) + ; CI: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[TRUNC23]] + ; CI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; CI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR10]](s16) + ; CI: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C14]](s32) + ; CI: [[OR15:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL15]] + ; CI: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR11]](s16) + ; CI: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16) + ; CI: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C14]](s32) + ; CI: [[OR16:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL16]] + ; CI: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16) + ; CI: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16) + ; CI: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C14]](s32) + ; CI: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]] + ; CI: [[MV1:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR15]](s32), [[OR16]](s32), [[OR17]](s32) ; CI: [[COPY24:%[0-9]+]]:_(s96) = COPY [[MV]](s96) ; CI: [[COPY25:%[0-9]+]]:_(s96) = COPY [[MV1]](s96) ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY24]](s96) @@ -10995,9 +12245,22 @@ body: | ; VI: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C11]] ; VI: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C12]](s16) ; VI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] - ; VI: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16), [[OR4]](s16), [[OR5]](s16) - ; VI: [[C13:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; VI: [[GEP11:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C13]](s64) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C13]](s32) + ; VI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL6]] + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; VI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C13]](s32) + ; VI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL7]] + ; VI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; VI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) + ; VI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C13]](s32) + ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] + ; VI: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR6]](s32), [[OR7]](s32), [[OR8]](s32) + ; VI: [[C14:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 + ; VI: [[GEP11:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C14]](s64) ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[GEP11]](p4) :: (load 1, addrspace 1) ; VI: [[GEP12:%[0-9]+]]:_(p4) = G_GEP [[GEP11]], [[C]](s64) ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[GEP12]](p4) :: (load 1, addrspace 1) @@ -11025,39 +12288,51 @@ body: | ; VI: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C11]] ; VI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32) ; VI: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C11]] - ; VI: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C12]](s16) - ; VI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL6]] + ; VI: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C12]](s16) + ; VI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL9]] ; VI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; VI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C11]] ; VI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32) ; VI: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C11]] - ; VI: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C12]](s16) - ; VI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL7]] + ; VI: [[SHL10:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C12]](s16) + ; VI: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL10]] ; VI: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD16]](s32) ; VI: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C11]] ; VI: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD17]](s32) ; VI: [[AND17:%[0-9]+]]:_(s16) = G_AND [[TRUNC17]], [[C11]] - ; VI: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND17]], [[C12]](s16) - ; VI: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[SHL8]] + ; VI: [[SHL11:%[0-9]+]]:_(s16) = G_SHL [[AND17]], [[C12]](s16) + ; VI: [[OR11:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[SHL11]] ; VI: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32) ; VI: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C11]] ; VI: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD19]](s32) ; VI: [[AND19:%[0-9]+]]:_(s16) = G_AND [[TRUNC19]], [[C11]] - ; VI: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND19]], [[C12]](s16) - ; VI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[SHL9]] + ; VI: [[SHL12:%[0-9]+]]:_(s16) = G_SHL [[AND19]], [[C12]](s16) + ; VI: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[SHL12]] ; VI: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD20]](s32) ; VI: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C11]] ; VI: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD21]](s32) ; VI: [[AND21:%[0-9]+]]:_(s16) = G_AND [[TRUNC21]], [[C11]] - ; VI: [[SHL10:%[0-9]+]]:_(s16) = G_SHL [[AND21]], [[C12]](s16) - ; VI: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[SHL10]] + ; VI: [[SHL13:%[0-9]+]]:_(s16) = G_SHL [[AND21]], [[C12]](s16) + ; VI: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[SHL13]] ; VI: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32) ; VI: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C11]] ; VI: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD23]](s32) ; VI: [[AND23:%[0-9]+]]:_(s16) = G_AND [[TRUNC23]], [[C11]] - ; VI: [[SHL11:%[0-9]+]]:_(s16) = G_SHL [[AND23]], [[C12]](s16) - ; VI: [[OR11:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[SHL11]] - ; VI: [[MV1:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR6]](s16), [[OR7]](s16), [[OR8]](s16), [[OR9]](s16), [[OR10]](s16), [[OR11]](s16) + ; VI: [[SHL14:%[0-9]+]]:_(s16) = G_SHL [[AND23]], [[C12]](s16) + ; VI: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[SHL14]] + ; VI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; VI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR10]](s16) + ; VI: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C13]](s32) + ; VI: [[OR15:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL15]] + ; VI: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR11]](s16) + ; VI: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16) + ; VI: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C13]](s32) + ; VI: [[OR16:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL16]] + ; VI: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16) + ; VI: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16) + ; VI: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C13]](s32) + ; VI: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]] + ; VI: [[MV1:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR15]](s32), [[OR16]](s32), [[OR17]](s32) ; VI: [[COPY1:%[0-9]+]]:_(s96) = COPY [[MV]](s96) ; VI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[MV1]](s96) ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) @@ -11136,9 +12411,22 @@ body: | ; GFX9: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C11]] ; GFX9: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C12]](s16) ; GFX9: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] - ; GFX9: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16), [[OR4]](s16), [[OR5]](s16) - ; GFX9: [[C13:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; GFX9: [[GEP11:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C13]](s64) + ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C13]](s32) + ; GFX9: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL6]] + ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; GFX9: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C13]](s32) + ; GFX9: [[OR7:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL7]] + ; GFX9: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; GFX9: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) + ; GFX9: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C13]](s32) + ; GFX9: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] + ; GFX9: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR6]](s32), [[OR7]](s32), [[OR8]](s32) + ; GFX9: [[C14:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 + ; GFX9: [[GEP11:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C14]](s64) ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[GEP11]](p4) :: (load 1, addrspace 1) ; GFX9: [[GEP12:%[0-9]+]]:_(p4) = G_GEP [[GEP11]], [[C]](s64) ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[GEP12]](p4) :: (load 1, addrspace 1) @@ -11166,39 +12454,51 @@ body: | ; GFX9: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C11]] ; GFX9: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32) ; GFX9: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C11]] - ; GFX9: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C12]](s16) - ; GFX9: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL6]] + ; GFX9: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C12]](s16) + ; GFX9: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL9]] ; GFX9: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; GFX9: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C11]] ; GFX9: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32) ; GFX9: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C11]] - ; GFX9: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C12]](s16) - ; GFX9: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL7]] + ; GFX9: [[SHL10:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C12]](s16) + ; GFX9: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL10]] ; GFX9: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD16]](s32) ; GFX9: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C11]] ; GFX9: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD17]](s32) ; GFX9: [[AND17:%[0-9]+]]:_(s16) = G_AND [[TRUNC17]], [[C11]] - ; GFX9: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND17]], [[C12]](s16) - ; GFX9: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[SHL8]] + ; GFX9: [[SHL11:%[0-9]+]]:_(s16) = G_SHL [[AND17]], [[C12]](s16) + ; GFX9: [[OR11:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[SHL11]] ; GFX9: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32) ; GFX9: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C11]] ; GFX9: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD19]](s32) ; GFX9: [[AND19:%[0-9]+]]:_(s16) = G_AND [[TRUNC19]], [[C11]] - ; GFX9: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND19]], [[C12]](s16) - ; GFX9: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[SHL9]] + ; GFX9: [[SHL12:%[0-9]+]]:_(s16) = G_SHL [[AND19]], [[C12]](s16) + ; GFX9: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[SHL12]] ; GFX9: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD20]](s32) ; GFX9: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C11]] ; GFX9: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD21]](s32) ; GFX9: [[AND21:%[0-9]+]]:_(s16) = G_AND [[TRUNC21]], [[C11]] - ; GFX9: [[SHL10:%[0-9]+]]:_(s16) = G_SHL [[AND21]], [[C12]](s16) - ; GFX9: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[SHL10]] + ; GFX9: [[SHL13:%[0-9]+]]:_(s16) = G_SHL [[AND21]], [[C12]](s16) + ; GFX9: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[SHL13]] ; GFX9: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32) ; GFX9: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C11]] ; GFX9: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD23]](s32) ; GFX9: [[AND23:%[0-9]+]]:_(s16) = G_AND [[TRUNC23]], [[C11]] - ; GFX9: [[SHL11:%[0-9]+]]:_(s16) = G_SHL [[AND23]], [[C12]](s16) - ; GFX9: [[OR11:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[SHL11]] - ; GFX9: [[MV1:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR6]](s16), [[OR7]](s16), [[OR8]](s16), [[OR9]](s16), [[OR10]](s16), [[OR11]](s16) + ; GFX9: [[SHL14:%[0-9]+]]:_(s16) = G_SHL [[AND23]], [[C12]](s16) + ; GFX9: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[SHL14]] + ; GFX9: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; GFX9: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR10]](s16) + ; GFX9: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C13]](s32) + ; GFX9: [[OR15:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL15]] + ; GFX9: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR11]](s16) + ; GFX9: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16) + ; GFX9: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C13]](s32) + ; GFX9: [[OR16:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL16]] + ; GFX9: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16) + ; GFX9: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16) + ; GFX9: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C13]](s32) + ; GFX9: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]] + ; GFX9: [[MV1:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR15]](s32), [[OR16]](s32), [[OR17]](s32) ; GFX9: [[COPY1:%[0-9]+]]:_(s96) = COPY [[MV]](s96) ; GFX9: [[COPY2:%[0-9]+]]:_(s96) = COPY [[MV1]](s96) ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) @@ -11289,9 +12589,22 @@ body: | ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C12]](s32) ; CI-MESA: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) ; CI-MESA: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] - ; CI-MESA: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16), [[OR4]](s16), [[OR5]](s16) - ; CI-MESA: [[C14:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; CI-MESA: [[GEP11:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C14]](s64) + ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI-MESA: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C14]](s32) + ; CI-MESA: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL6]] + ; CI-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CI-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C14]](s32) + ; CI-MESA: [[OR7:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL7]] + ; CI-MESA: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; CI-MESA: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) + ; CI-MESA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C14]](s32) + ; CI-MESA: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] + ; CI-MESA: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR6]](s32), [[OR7]](s32), [[OR8]](s32) + ; CI-MESA: [[C15:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 + ; CI-MESA: [[GEP11:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C15]](s64) ; CI-MESA: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[GEP11]](p4) :: (load 1, addrspace 1) ; CI-MESA: [[GEP12:%[0-9]+]]:_(p4) = G_GEP [[GEP11]], [[C]](s64) ; CI-MESA: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[GEP12]](p4) :: (load 1, addrspace 1) @@ -11320,50 +12633,62 @@ body: | ; CI-MESA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C12]](s32) ; CI-MESA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) ; CI-MESA: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C13]] - ; CI-MESA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY12]](s32) - ; CI-MESA: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) - ; CI-MESA: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] + ; CI-MESA: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY12]](s32) + ; CI-MESA: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) + ; CI-MESA: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] ; CI-MESA: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; CI-MESA: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C11]] ; CI-MESA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C12]](s32) ; CI-MESA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32) ; CI-MESA: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C13]] - ; CI-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY14]](s32) - ; CI-MESA: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) - ; CI-MESA: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] + ; CI-MESA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY14]](s32) + ; CI-MESA: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL10]](s32) + ; CI-MESA: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] ; CI-MESA: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD16]](s32) ; CI-MESA: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C11]] ; CI-MESA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[C12]](s32) ; CI-MESA: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LOAD17]](s32) ; CI-MESA: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C13]] - ; CI-MESA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[COPY16]](s32) - ; CI-MESA: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32) - ; CI-MESA: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[TRUNC17]] + ; CI-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[COPY16]](s32) + ; CI-MESA: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[SHL11]](s32) + ; CI-MESA: [[OR11:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[TRUNC17]] ; CI-MESA: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32) ; CI-MESA: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C11]] ; CI-MESA: [[COPY18:%[0-9]+]]:_(s32) = COPY [[C12]](s32) ; CI-MESA: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LOAD19]](s32) ; CI-MESA: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C13]] - ; CI-MESA: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[COPY18]](s32) - ; CI-MESA: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) - ; CI-MESA: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[TRUNC19]] + ; CI-MESA: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[COPY18]](s32) + ; CI-MESA: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[SHL12]](s32) + ; CI-MESA: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[TRUNC19]] ; CI-MESA: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD20]](s32) ; CI-MESA: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C11]] ; CI-MESA: [[COPY20:%[0-9]+]]:_(s32) = COPY [[C12]](s32) ; CI-MESA: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LOAD21]](s32) ; CI-MESA: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C13]] - ; CI-MESA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[COPY20]](s32) - ; CI-MESA: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[SHL10]](s32) - ; CI-MESA: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[TRUNC21]] + ; CI-MESA: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[COPY20]](s32) + ; CI-MESA: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[SHL13]](s32) + ; CI-MESA: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[TRUNC21]] ; CI-MESA: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32) ; CI-MESA: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C11]] ; CI-MESA: [[COPY22:%[0-9]+]]:_(s32) = COPY [[C12]](s32) ; CI-MESA: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LOAD23]](s32) ; CI-MESA: [[AND23:%[0-9]+]]:_(s32) = G_AND [[COPY23]], [[C13]] - ; CI-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[COPY22]](s32) - ; CI-MESA: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[SHL11]](s32) - ; CI-MESA: [[OR11:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[TRUNC23]] - ; CI-MESA: [[MV1:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR6]](s16), [[OR7]](s16), [[OR8]](s16), [[OR9]](s16), [[OR10]](s16), [[OR11]](s16) + ; CI-MESA: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[COPY22]](s32) + ; CI-MESA: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[SHL14]](s32) + ; CI-MESA: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[TRUNC23]] + ; CI-MESA: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; CI-MESA: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR10]](s16) + ; CI-MESA: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C14]](s32) + ; CI-MESA: [[OR15:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL15]] + ; CI-MESA: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR11]](s16) + ; CI-MESA: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16) + ; CI-MESA: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C14]](s32) + ; CI-MESA: [[OR16:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL16]] + ; CI-MESA: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16) + ; CI-MESA: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16) + ; CI-MESA: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C14]](s32) + ; CI-MESA: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]] + ; CI-MESA: [[MV1:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR15]](s32), [[OR16]](s32), [[OR17]](s32) ; CI-MESA: [[COPY24:%[0-9]+]]:_(s96) = COPY [[MV]](s96) ; CI-MESA: [[COPY25:%[0-9]+]]:_(s96) = COPY [[MV1]](s96) ; CI-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[COPY24]](s96) @@ -11442,9 +12767,22 @@ body: | ; GFX9-MESA: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C11]] ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C12]](s16) ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] - ; GFX9-MESA: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16), [[OR4]](s16), [[OR5]](s16) - ; GFX9-MESA: [[C13:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; GFX9-MESA: [[GEP11:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C13]](s64) + ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9-MESA: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C13]](s32) + ; GFX9-MESA: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL6]] + ; GFX9-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; GFX9-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; GFX9-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C13]](s32) + ; GFX9-MESA: [[OR7:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL7]] + ; GFX9-MESA: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; GFX9-MESA: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) + ; GFX9-MESA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C13]](s32) + ; GFX9-MESA: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] + ; GFX9-MESA: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR6]](s32), [[OR7]](s32), [[OR8]](s32) + ; GFX9-MESA: [[C14:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 + ; GFX9-MESA: [[GEP11:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C14]](s64) ; GFX9-MESA: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[GEP11]](p4) :: (load 1, addrspace 1) ; GFX9-MESA: [[GEP12:%[0-9]+]]:_(p4) = G_GEP [[GEP11]], [[C]](s64) ; GFX9-MESA: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[GEP12]](p4) :: (load 1, addrspace 1) @@ -11472,39 +12810,51 @@ body: | ; GFX9-MESA: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C11]] ; GFX9-MESA: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32) ; GFX9-MESA: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C11]] - ; GFX9-MESA: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C12]](s16) - ; GFX9-MESA: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL6]] + ; GFX9-MESA: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C12]](s16) + ; GFX9-MESA: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL9]] ; GFX9-MESA: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; GFX9-MESA: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C11]] ; GFX9-MESA: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32) ; GFX9-MESA: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C11]] - ; GFX9-MESA: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C12]](s16) - ; GFX9-MESA: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL7]] + ; GFX9-MESA: [[SHL10:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C12]](s16) + ; GFX9-MESA: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL10]] ; GFX9-MESA: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD16]](s32) ; GFX9-MESA: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C11]] ; GFX9-MESA: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD17]](s32) ; GFX9-MESA: [[AND17:%[0-9]+]]:_(s16) = G_AND [[TRUNC17]], [[C11]] - ; GFX9-MESA: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND17]], [[C12]](s16) - ; GFX9-MESA: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[SHL8]] + ; GFX9-MESA: [[SHL11:%[0-9]+]]:_(s16) = G_SHL [[AND17]], [[C12]](s16) + ; GFX9-MESA: [[OR11:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[SHL11]] ; GFX9-MESA: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32) ; GFX9-MESA: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C11]] ; GFX9-MESA: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD19]](s32) ; GFX9-MESA: [[AND19:%[0-9]+]]:_(s16) = G_AND [[TRUNC19]], [[C11]] - ; GFX9-MESA: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND19]], [[C12]](s16) - ; GFX9-MESA: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[SHL9]] + ; GFX9-MESA: [[SHL12:%[0-9]+]]:_(s16) = G_SHL [[AND19]], [[C12]](s16) + ; GFX9-MESA: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[SHL12]] ; GFX9-MESA: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD20]](s32) ; GFX9-MESA: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C11]] ; GFX9-MESA: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD21]](s32) ; GFX9-MESA: [[AND21:%[0-9]+]]:_(s16) = G_AND [[TRUNC21]], [[C11]] - ; GFX9-MESA: [[SHL10:%[0-9]+]]:_(s16) = G_SHL [[AND21]], [[C12]](s16) - ; GFX9-MESA: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[SHL10]] + ; GFX9-MESA: [[SHL13:%[0-9]+]]:_(s16) = G_SHL [[AND21]], [[C12]](s16) + ; GFX9-MESA: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[SHL13]] ; GFX9-MESA: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32) ; GFX9-MESA: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C11]] ; GFX9-MESA: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD23]](s32) ; GFX9-MESA: [[AND23:%[0-9]+]]:_(s16) = G_AND [[TRUNC23]], [[C11]] - ; GFX9-MESA: [[SHL11:%[0-9]+]]:_(s16) = G_SHL [[AND23]], [[C12]](s16) - ; GFX9-MESA: [[OR11:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[SHL11]] - ; GFX9-MESA: [[MV1:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR6]](s16), [[OR7]](s16), [[OR8]](s16), [[OR9]](s16), [[OR10]](s16), [[OR11]](s16) + ; GFX9-MESA: [[SHL14:%[0-9]+]]:_(s16) = G_SHL [[AND23]], [[C12]](s16) + ; GFX9-MESA: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[SHL14]] + ; GFX9-MESA: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; GFX9-MESA: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR10]](s16) + ; GFX9-MESA: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C13]](s32) + ; GFX9-MESA: [[OR15:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL15]] + ; GFX9-MESA: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR11]](s16) + ; GFX9-MESA: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16) + ; GFX9-MESA: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C13]](s32) + ; GFX9-MESA: [[OR16:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL16]] + ; GFX9-MESA: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16) + ; GFX9-MESA: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16) + ; GFX9-MESA: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C13]](s32) + ; GFX9-MESA: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]] + ; GFX9-MESA: [[MV1:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR15]](s32), [[OR16]](s32), [[OR17]](s32) ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s96) = COPY [[MV]](s96) ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s96) = COPY [[MV1]](s96) ; GFX9-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) @@ -11526,248 +12876,378 @@ body: | ; CI-LABEL: name: test_extload_constant_v2s96_from_24_align2 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 1) - ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C]](s64) ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p4) :: (load 2, addrspace 1) - ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI: [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C1]](s64) ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p4) :: (load 2, addrspace 1) - ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; CI: [[GEP2:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C2]](s64) ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p4) :: (load 2, addrspace 1) - ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) ; CI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CI: [[GEP3:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C3]](s64) ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p4) :: (load 2, addrspace 1) - ; CI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; CI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10 ; CI: [[GEP4:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C4]](s64) ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p4) :: (load 2, addrspace 1) - ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) - ; CI: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16), [[TRUNC4]](s16), [[TRUNC5]](s16) - ; CI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; CI: [[GEP5:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C5]](s64) + ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C5]] + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]] + ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C5]] + ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C5]] + ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] + ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]] + ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CI: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32), [[OR2]](s32) + ; CI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 + ; CI: [[GEP5:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C7]](s64) ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[GEP5]](p4) :: (load 2, addrspace 1) - ; CI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; CI: [[GEP6:%[0-9]+]]:_(p4) = G_GEP [[GEP5]], [[C]](s64) ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[GEP6]](p4) :: (load 2, addrspace 1) - ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) ; CI: [[GEP7:%[0-9]+]]:_(p4) = G_GEP [[GEP5]], [[C1]](s64) ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p4) :: (load 2, addrspace 1) - ; CI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32) ; CI: [[GEP8:%[0-9]+]]:_(p4) = G_GEP [[GEP5]], [[C2]](s64) ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p4) :: (load 2, addrspace 1) - ; CI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) ; CI: [[GEP9:%[0-9]+]]:_(p4) = G_GEP [[GEP5]], [[C3]](s64) ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[GEP9]](p4) :: (load 2, addrspace 1) - ; CI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; CI: [[GEP10:%[0-9]+]]:_(p4) = G_GEP [[GEP5]], [[C4]](s64) ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[GEP10]](p4) :: (load 2, addrspace 1) - ; CI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) - ; CI: [[MV1:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[TRUNC6]](s16), [[TRUNC7]](s16), [[TRUNC8]](s16), [[TRUNC9]](s16), [[TRUNC10]](s16), [[TRUNC11]](s16) - ; CI: [[COPY1:%[0-9]+]]:_(s96) = COPY [[MV]](s96) - ; CI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[MV1]](s96) - ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; CI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; CI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C5]] + ; CI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) + ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C5]] + ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) + ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; CI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) + ; CI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C5]] + ; CI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) + ; CI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C5]] + ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C6]](s32) + ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]] + ; CI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32) + ; CI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C5]] + ; CI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) + ; CI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C5]] + ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32) + ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]] + ; CI: [[MV1:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR3]](s32), [[OR4]](s32), [[OR5]](s32) + ; CI: [[COPY13:%[0-9]+]]:_(s96) = COPY [[MV]](s96) + ; CI: [[COPY14:%[0-9]+]]:_(s96) = COPY [[MV1]](s96) + ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY13]](s96) + ; CI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY14]](s96) ; VI-LABEL: name: test_extload_constant_v2s96_from_24_align2 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 1) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C]](s64) ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p4) :: (load 2, addrspace 1) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C1]](s64) ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p4) :: (load 2, addrspace 1) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; VI: [[GEP2:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C2]](s64) ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p4) :: (load 2, addrspace 1) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI: [[GEP3:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C3]](s64) ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p4) :: (load 2, addrspace 1) - ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10 ; VI: [[GEP4:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C4]](s64) ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p4) :: (load 2, addrspace 1) - ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) - ; VI: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16), [[TRUNC4]](s16), [[TRUNC5]](s16) - ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; VI: [[GEP5:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C5]](s64) + ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C5]] + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]] + ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C5]] + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C5]] + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] + ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]] + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; VI: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32), [[OR2]](s32) + ; VI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 + ; VI: [[GEP5:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C7]](s64) ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[GEP5]](p4) :: (load 2, addrspace 1) - ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; VI: [[GEP6:%[0-9]+]]:_(p4) = G_GEP [[GEP5]], [[C]](s64) ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[GEP6]](p4) :: (load 2, addrspace 1) - ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) ; VI: [[GEP7:%[0-9]+]]:_(p4) = G_GEP [[GEP5]], [[C1]](s64) ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p4) :: (load 2, addrspace 1) - ; VI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32) ; VI: [[GEP8:%[0-9]+]]:_(p4) = G_GEP [[GEP5]], [[C2]](s64) ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p4) :: (load 2, addrspace 1) - ; VI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) ; VI: [[GEP9:%[0-9]+]]:_(p4) = G_GEP [[GEP5]], [[C3]](s64) ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[GEP9]](p4) :: (load 2, addrspace 1) - ; VI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; VI: [[GEP10:%[0-9]+]]:_(p4) = G_GEP [[GEP5]], [[C4]](s64) ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[GEP10]](p4) :: (load 2, addrspace 1) - ; VI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) - ; VI: [[MV1:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[TRUNC6]](s16), [[TRUNC7]](s16), [[TRUNC8]](s16), [[TRUNC9]](s16), [[TRUNC10]](s16), [[TRUNC11]](s16) - ; VI: [[COPY1:%[0-9]+]]:_(s96) = COPY [[MV]](s96) - ; VI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[MV1]](s96) - ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; VI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C5]] + ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) + ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C5]] + ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) + ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) + ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C5]] + ; VI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) + ; VI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C5]] + ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C6]](s32) + ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]] + ; VI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32) + ; VI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C5]] + ; VI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) + ; VI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C5]] + ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32) + ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]] + ; VI: [[MV1:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR3]](s32), [[OR4]](s32), [[OR5]](s32) + ; VI: [[COPY13:%[0-9]+]]:_(s96) = COPY [[MV]](s96) + ; VI: [[COPY14:%[0-9]+]]:_(s96) = COPY [[MV1]](s96) + ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY13]](s96) + ; VI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY14]](s96) ; GFX9-LABEL: name: test_extload_constant_v2s96_from_24_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 1) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C]](s64) ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p4) :: (load 2, addrspace 1) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9: [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C1]](s64) ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p4) :: (load 2, addrspace 1) - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; GFX9: [[GEP2:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C2]](s64) ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p4) :: (load 2, addrspace 1) - ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) ; GFX9: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; GFX9: [[GEP3:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C3]](s64) ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p4) :: (load 2, addrspace 1) - ; GFX9: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; GFX9: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10 ; GFX9: [[GEP4:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C4]](s64) ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p4) :: (load 2, addrspace 1) - ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) - ; GFX9: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16), [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; GFX9: [[GEP5:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C5]](s64) + ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C5]] + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]] + ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C5]] + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C5]] + ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] + ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]] + ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; GFX9: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32), [[OR2]](s32) + ; GFX9: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 + ; GFX9: [[GEP5:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C7]](s64) ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[GEP5]](p4) :: (load 2, addrspace 1) - ; GFX9: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; GFX9: [[GEP6:%[0-9]+]]:_(p4) = G_GEP [[GEP5]], [[C]](s64) ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[GEP6]](p4) :: (load 2, addrspace 1) - ; GFX9: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) ; GFX9: [[GEP7:%[0-9]+]]:_(p4) = G_GEP [[GEP5]], [[C1]](s64) ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p4) :: (load 2, addrspace 1) - ; GFX9: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32) ; GFX9: [[GEP8:%[0-9]+]]:_(p4) = G_GEP [[GEP5]], [[C2]](s64) ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p4) :: (load 2, addrspace 1) - ; GFX9: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) ; GFX9: [[GEP9:%[0-9]+]]:_(p4) = G_GEP [[GEP5]], [[C3]](s64) ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[GEP9]](p4) :: (load 2, addrspace 1) - ; GFX9: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; GFX9: [[GEP10:%[0-9]+]]:_(p4) = G_GEP [[GEP5]], [[C4]](s64) ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[GEP10]](p4) :: (load 2, addrspace 1) - ; GFX9: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) - ; GFX9: [[MV1:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[TRUNC6]](s16), [[TRUNC7]](s16), [[TRUNC8]](s16), [[TRUNC9]](s16), [[TRUNC10]](s16), [[TRUNC11]](s16) - ; GFX9: [[COPY1:%[0-9]+]]:_(s96) = COPY [[MV]](s96) - ; GFX9: [[COPY2:%[0-9]+]]:_(s96) = COPY [[MV1]](s96) - ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX9: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; GFX9: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C5]] + ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) + ; GFX9: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C5]] + ; GFX9: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) + ; GFX9: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) + ; GFX9: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C5]] + ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) + ; GFX9: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C5]] + ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C6]](s32) + ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]] + ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32) + ; GFX9: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C5]] + ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) + ; GFX9: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C5]] + ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32) + ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]] + ; GFX9: [[MV1:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR3]](s32), [[OR4]](s32), [[OR5]](s32) + ; GFX9: [[COPY13:%[0-9]+]]:_(s96) = COPY [[MV]](s96) + ; GFX9: [[COPY14:%[0-9]+]]:_(s96) = COPY [[MV1]](s96) + ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[COPY13]](s96) + ; GFX9: $vgpr3_vgpr4_vgpr5 = COPY [[COPY14]](s96) ; CI-MESA-LABEL: name: test_extload_constant_v2s96_from_24_align2 ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 1) - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI-MESA: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C]](s64) ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p4) :: (load 2, addrspace 1) - ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI-MESA: [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C1]](s64) ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p4) :: (load 2, addrspace 1) - ; CI-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; CI-MESA: [[GEP2:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C2]](s64) ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p4) :: (load 2, addrspace 1) - ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) ; CI-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CI-MESA: [[GEP3:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C3]](s64) ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p4) :: (load 2, addrspace 1) - ; CI-MESA: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; CI-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10 ; CI-MESA: [[GEP4:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C4]](s64) ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p4) :: (load 2, addrspace 1) - ; CI-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) - ; CI-MESA: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16), [[TRUNC4]](s16), [[TRUNC5]](s16) - ; CI-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; CI-MESA: [[GEP5:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C5]](s64) + ; CI-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C5]] + ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]] + ; CI-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C5]] + ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C5]] + ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] + ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]] + ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CI-MESA: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32), [[OR2]](s32) + ; CI-MESA: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 + ; CI-MESA: [[GEP5:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C7]](s64) ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[GEP5]](p4) :: (load 2, addrspace 1) - ; CI-MESA: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; CI-MESA: [[GEP6:%[0-9]+]]:_(p4) = G_GEP [[GEP5]], [[C]](s64) ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[GEP6]](p4) :: (load 2, addrspace 1) - ; CI-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) ; CI-MESA: [[GEP7:%[0-9]+]]:_(p4) = G_GEP [[GEP5]], [[C1]](s64) ; CI-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p4) :: (load 2, addrspace 1) - ; CI-MESA: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32) ; CI-MESA: [[GEP8:%[0-9]+]]:_(p4) = G_GEP [[GEP5]], [[C2]](s64) ; CI-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p4) :: (load 2, addrspace 1) - ; CI-MESA: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) ; CI-MESA: [[GEP9:%[0-9]+]]:_(p4) = G_GEP [[GEP5]], [[C3]](s64) ; CI-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[GEP9]](p4) :: (load 2, addrspace 1) - ; CI-MESA: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; CI-MESA: [[GEP10:%[0-9]+]]:_(p4) = G_GEP [[GEP5]], [[C4]](s64) ; CI-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[GEP10]](p4) :: (load 2, addrspace 1) - ; CI-MESA: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) - ; CI-MESA: [[MV1:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[TRUNC6]](s16), [[TRUNC7]](s16), [[TRUNC8]](s16), [[TRUNC9]](s16), [[TRUNC10]](s16), [[TRUNC11]](s16) - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s96) = COPY [[MV]](s96) - ; CI-MESA: [[COPY2:%[0-9]+]]:_(s96) = COPY [[MV1]](s96) - ; CI-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; CI-MESA: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; CI-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; CI-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C5]] + ; CI-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) + ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C5]] + ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) + ; CI-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; CI-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) + ; CI-MESA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C5]] + ; CI-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) + ; CI-MESA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C5]] + ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C6]](s32) + ; CI-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]] + ; CI-MESA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32) + ; CI-MESA: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C5]] + ; CI-MESA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) + ; CI-MESA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C5]] + ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32) + ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]] + ; CI-MESA: [[MV1:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR3]](s32), [[OR4]](s32), [[OR5]](s32) + ; CI-MESA: [[COPY13:%[0-9]+]]:_(s96) = COPY [[MV]](s96) + ; CI-MESA: [[COPY14:%[0-9]+]]:_(s96) = COPY [[MV1]](s96) + ; CI-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[COPY13]](s96) + ; CI-MESA: $vgpr3_vgpr4_vgpr5 = COPY [[COPY14]](s96) ; GFX9-MESA-LABEL: name: test_extload_constant_v2s96_from_24_align2 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 1) - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-MESA: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C]](s64) ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p4) :: (load 2, addrspace 1) - ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9-MESA: [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C1]](s64) ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p4) :: (load 2, addrspace 1) - ; GFX9-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; GFX9-MESA: [[GEP2:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C2]](s64) ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p4) :: (load 2, addrspace 1) - ; GFX9-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) ; GFX9-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; GFX9-MESA: [[GEP3:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C3]](s64) ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p4) :: (load 2, addrspace 1) - ; GFX9-MESA: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; GFX9-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10 ; GFX9-MESA: [[GEP4:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C4]](s64) ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p4) :: (load 2, addrspace 1) - ; GFX9-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) - ; GFX9-MESA: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16), [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; GFX9-MESA: [[GEP5:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C5]](s64) + ; GFX9-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C5]] + ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]] + ; GFX9-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; GFX9-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C5]] + ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C5]] + ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; GFX9-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; GFX9-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] + ; GFX9-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; GFX9-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]] + ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; GFX9-MESA: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32), [[OR2]](s32) + ; GFX9-MESA: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 + ; GFX9-MESA: [[GEP5:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C7]](s64) ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[GEP5]](p4) :: (load 2, addrspace 1) - ; GFX9-MESA: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; GFX9-MESA: [[GEP6:%[0-9]+]]:_(p4) = G_GEP [[GEP5]], [[C]](s64) ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[GEP6]](p4) :: (load 2, addrspace 1) - ; GFX9-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) ; GFX9-MESA: [[GEP7:%[0-9]+]]:_(p4) = G_GEP [[GEP5]], [[C1]](s64) ; GFX9-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p4) :: (load 2, addrspace 1) - ; GFX9-MESA: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32) ; GFX9-MESA: [[GEP8:%[0-9]+]]:_(p4) = G_GEP [[GEP5]], [[C2]](s64) ; GFX9-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p4) :: (load 2, addrspace 1) - ; GFX9-MESA: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) ; GFX9-MESA: [[GEP9:%[0-9]+]]:_(p4) = G_GEP [[GEP5]], [[C3]](s64) ; GFX9-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[GEP9]](p4) :: (load 2, addrspace 1) - ; GFX9-MESA: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; GFX9-MESA: [[GEP10:%[0-9]+]]:_(p4) = G_GEP [[GEP5]], [[C4]](s64) ; GFX9-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[GEP10]](p4) :: (load 2, addrspace 1) - ; GFX9-MESA: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) - ; GFX9-MESA: [[MV1:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[TRUNC6]](s16), [[TRUNC7]](s16), [[TRUNC8]](s16), [[TRUNC9]](s16), [[TRUNC10]](s16), [[TRUNC11]](s16) - ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s96) = COPY [[MV]](s96) - ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s96) = COPY [[MV1]](s96) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX9-MESA: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX9-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; GFX9-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C5]] + ; GFX9-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) + ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C5]] + ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) + ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; GFX9-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) + ; GFX9-MESA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C5]] + ; GFX9-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) + ; GFX9-MESA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C5]] + ; GFX9-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C6]](s32) + ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]] + ; GFX9-MESA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32) + ; GFX9-MESA: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C5]] + ; GFX9-MESA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) + ; GFX9-MESA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C5]] + ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32) + ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]] + ; GFX9-MESA: [[MV1:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR3]](s32), [[OR4]](s32), [[OR5]](s32) + ; GFX9-MESA: [[COPY13:%[0-9]+]]:_(s96) = COPY [[MV]](s96) + ; GFX9-MESA: [[COPY14:%[0-9]+]]:_(s96) = COPY [[MV1]](s96) + ; GFX9-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[COPY13]](s96) + ; GFX9-MESA: $vgpr3_vgpr4_vgpr5 = COPY [[COPY14]](s96) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<2 x s96>) = G_LOAD %0 :: (load 24, align 2, addrspace 1) %2:_(s96) = G_EXTRACT %1, 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir index bb8bf7c84d751..5fb5ad65673a1 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir @@ -383,53 +383,78 @@ body: | ; CI-LABEL: name: test_load_flat_s32_align2 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) - ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C]](s64) ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p0) :: (load 2) - ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; CI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; CI: $vgpr0 = COPY [[MV]](s32) + ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI: $vgpr0 = COPY [[OR]](s32) ; VI-LABEL: name: test_load_flat_s32_align2 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C]](s64) ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p0) :: (load 2) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; VI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; VI: $vgpr0 = COPY [[MV]](s32) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; VI: $vgpr0 = COPY [[OR]](s32) ; GFX9-LABEL: name: test_load_flat_s32_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C]](s64) ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p0) :: (load 2) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9: $vgpr0 = COPY [[MV]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX9: $vgpr0 = COPY [[OR]](s32) ; CI-MESA-LABEL: name: test_load_flat_s32_align2 ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI-MESA: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C]](s64) ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p0) :: (load 2) - ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; CI-MESA: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; CI-MESA: $vgpr0 = COPY [[MV]](s32) + ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI-MESA: $vgpr0 = COPY [[OR]](s32) ; GFX9-MESA-LABEL: name: test_load_flat_s32_align2 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-MESA: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C]](s64) ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p0) :: (load 2) - ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9-MESA: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-MESA: $vgpr0 = COPY [[MV]](s32) + ; GFX9-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; GFX9-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; GFX9-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX9-MESA: $vgpr0 = COPY [[OR]](s32) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(s32) = G_LOAD %0 :: (load 4, align 2, addrspace 0) $vgpr0 = COPY %1 @@ -471,8 +496,12 @@ body: | ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; CI: $vgpr0 = COPY [[MV]](s32) + ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32) + ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; CI: $vgpr0 = COPY [[OR2]](s32) ; VI-LABEL: name: test_load_flat_s32_align1 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) @@ -499,8 +528,12 @@ body: | ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; VI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; VI: $vgpr0 = COPY [[MV]](s32) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; VI: $vgpr0 = COPY [[OR2]](s32) ; GFX9-LABEL: name: test_load_flat_s32_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) @@ -527,8 +560,12 @@ body: | ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX9: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; GFX9: $vgpr0 = COPY [[MV]](s32) + ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; GFX9: $vgpr0 = COPY [[OR2]](s32) ; CI-MESA-LABEL: name: test_load_flat_s32_align1 ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) @@ -559,8 +596,12 @@ body: | ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CI-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI-MESA: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; CI-MESA: $vgpr0 = COPY [[MV]](s32) + ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32) + ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; CI-MESA: $vgpr0 = COPY [[OR2]](s32) ; GFX9-MESA-LABEL: name: test_load_flat_s32_align1 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) @@ -587,8 +628,12 @@ body: | ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX9-MESA: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; GFX9-MESA: $vgpr0 = COPY [[MV]](s32) + ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; GFX9-MESA: $vgpr0 = COPY [[OR2]](s32) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(s32) = G_LOAD %0 :: (load 4, align 1, addrspace 0) $vgpr0 = COPY %1 @@ -712,92 +757,142 @@ body: | ; CI-LABEL: name: test_load_flat_s64_align2 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) - ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C]](s64) ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p0) :: (load 2) - ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI: [[GEP1:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C1]](s64) ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p0) :: (load 2) - ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; CI: [[GEP2:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C2]](s64) ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p0) :: (load 2) - ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) + ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; CI: $vgpr0_vgpr1 = COPY [[MV]](s64) ; VI-LABEL: name: test_load_flat_s64_align2 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C]](s64) ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p0) :: (load 2) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[GEP1:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C1]](s64) ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p0) :: (load 2) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; VI: [[GEP2:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C2]](s64) ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p0) :: (load 2) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) + ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; VI: $vgpr0_vgpr1 = COPY [[MV]](s64) ; GFX9-LABEL: name: test_load_flat_s64_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C]](s64) ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p0) :: (load 2) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9: [[GEP1:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C1]](s64) ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p0) :: (load 2) - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; GFX9: [[GEP2:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C2]](s64) ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p0) :: (load 2) - ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) + ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[MV]](s64) ; CI-MESA-LABEL: name: test_load_flat_s64_align2 ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI-MESA: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C]](s64) ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p0) :: (load 2) - ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI-MESA: [[GEP1:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C1]](s64) ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p0) :: (load 2) - ; CI-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; CI-MESA: [[GEP2:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C2]](s64) ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p0) :: (load 2) - ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; CI-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) + ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; CI-MESA: $vgpr0_vgpr1 = COPY [[MV]](s64) ; GFX9-MESA-LABEL: name: test_load_flat_s64_align2 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-MESA: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C]](s64) ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p0) :: (load 2) - ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9-MESA: [[GEP1:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C1]](s64) ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p0) :: (load 2) - ; GFX9-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; GFX9-MESA: [[GEP2:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C2]](s64) ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p0) :: (load 2) - ; GFX9-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX9-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) + ; GFX9-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; GFX9-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; GFX9-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; GFX9-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[MV]](s64) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(s64) = G_LOAD %0 :: (load 8, align 2, addrspace 0) @@ -868,7 +963,16 @@ body: | ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32) ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) + ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32) + ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; CI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32) + ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; CI: $vgpr0_vgpr1 = COPY [[MV]](s64) ; VI-LABEL: name: test_load_flat_s64_align1 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 @@ -920,7 +1024,16 @@ body: | ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]] ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16) ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) + ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) + ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; VI: $vgpr0_vgpr1 = COPY [[MV]](s64) ; GFX9-LABEL: name: test_load_flat_s64_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 @@ -972,7 +1085,16 @@ body: | ; GFX9: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]] ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16) ; GFX9: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) + ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) + ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) + ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[MV]](s64) ; CI-MESA-LABEL: name: test_load_flat_s64_align1 ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 @@ -1032,7 +1154,16 @@ body: | ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32) ; CI-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; CI-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) + ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI-MESA: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32) + ; CI-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; CI-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CI-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32) + ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; CI-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; CI-MESA: $vgpr0_vgpr1 = COPY [[MV]](s64) ; GFX9-MESA-LABEL: name: test_load_flat_s64_align1 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 @@ -1084,7 +1215,16 @@ body: | ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]] ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16) ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; GFX9-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) + ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9-MESA: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) + ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; GFX9-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; GFX9-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) + ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; GFX9-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[MV]](s64) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(s64) = G_LOAD %0 :: (load 8, align 1, addrspace 0) @@ -1193,132 +1333,202 @@ body: | ; CI-LABEL: name: test_load_flat_s96_align2 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) - ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C]](s64) ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p0) :: (load 2) - ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI: [[GEP1:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C1]](s64) ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p0) :: (load 2) - ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; CI: [[GEP2:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C2]](s64) ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p0) :: (load 2) - ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) ; CI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CI: [[GEP3:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C3]](s64) ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p0) :: (load 2) - ; CI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; CI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10 ; CI: [[GEP4:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C4]](s64) ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p0) :: (load 2) - ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) - ; CI: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16), [[TRUNC4]](s16), [[TRUNC5]](s16) + ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C5]] + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]] + ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C5]] + ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C5]] + ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] + ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]] + ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CI: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32), [[OR2]](s32) ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) ; VI-LABEL: name: test_load_flat_s96_align2 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C]](s64) ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p0) :: (load 2) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[GEP1:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C1]](s64) ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p0) :: (load 2) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; VI: [[GEP2:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C2]](s64) ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p0) :: (load 2) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI: [[GEP3:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C3]](s64) ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p0) :: (load 2) - ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10 ; VI: [[GEP4:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C4]](s64) ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p0) :: (load 2) - ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) - ; VI: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16), [[TRUNC4]](s16), [[TRUNC5]](s16) + ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C5]] + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]] + ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C5]] + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C5]] + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] + ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]] + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; VI: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32), [[OR2]](s32) ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) ; GFX9-LABEL: name: test_load_flat_s96_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C]](s64) ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p0) :: (load 2) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9: [[GEP1:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C1]](s64) ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p0) :: (load 2) - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; GFX9: [[GEP2:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C2]](s64) ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p0) :: (load 2) - ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) ; GFX9: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; GFX9: [[GEP3:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C3]](s64) ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p0) :: (load 2) - ; GFX9: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; GFX9: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10 ; GFX9: [[GEP4:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C4]](s64) ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p0) :: (load 2) - ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) - ; GFX9: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16), [[TRUNC4]](s16), [[TRUNC5]](s16) + ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C5]] + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]] + ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C5]] + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C5]] + ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] + ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]] + ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; GFX9: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32), [[OR2]](s32) ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) ; CI-MESA-LABEL: name: test_load_flat_s96_align2 ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI-MESA: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C]](s64) ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p0) :: (load 2) - ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI-MESA: [[GEP1:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C1]](s64) ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p0) :: (load 2) - ; CI-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; CI-MESA: [[GEP2:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C2]](s64) ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p0) :: (load 2) - ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) ; CI-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CI-MESA: [[GEP3:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C3]](s64) ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p0) :: (load 2) - ; CI-MESA: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; CI-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10 ; CI-MESA: [[GEP4:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C4]](s64) ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p0) :: (load 2) - ; CI-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) - ; CI-MESA: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16), [[TRUNC4]](s16), [[TRUNC5]](s16) + ; CI-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C5]] + ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]] + ; CI-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C5]] + ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C5]] + ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] + ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]] + ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CI-MESA: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32), [[OR2]](s32) ; CI-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) ; GFX9-MESA-LABEL: name: test_load_flat_s96_align2 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-MESA: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C]](s64) ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p0) :: (load 2) - ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9-MESA: [[GEP1:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C1]](s64) ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p0) :: (load 2) - ; GFX9-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; GFX9-MESA: [[GEP2:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C2]](s64) ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p0) :: (load 2) - ; GFX9-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) ; GFX9-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; GFX9-MESA: [[GEP3:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C3]](s64) ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p0) :: (load 2) - ; GFX9-MESA: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; GFX9-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10 ; GFX9-MESA: [[GEP4:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C4]](s64) ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p0) :: (load 2) - ; GFX9-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) - ; GFX9-MESA: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16), [[TRUNC4]](s16), [[TRUNC5]](s16) + ; GFX9-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C5]] + ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]] + ; GFX9-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; GFX9-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C5]] + ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C5]] + ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; GFX9-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; GFX9-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] + ; GFX9-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; GFX9-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]] + ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; GFX9-MESA: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32), [[OR2]](s32) ; GFX9-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(s96) = G_LOAD %0 :: (load 12, align 2, addrspace 0) @@ -1417,7 +1627,20 @@ body: | ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C12]](s32) ; CI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) ; CI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] - ; CI: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16), [[OR4]](s16), [[OR5]](s16) + ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C14]](s32) + ; CI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL6]] + ; CI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C14]](s32) + ; CI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL7]] + ; CI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; CI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) + ; CI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C14]](s32) + ; CI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] + ; CI: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR6]](s32), [[OR7]](s32), [[OR8]](s32) ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) ; VI-LABEL: name: test_load_flat_s96_align1 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 @@ -1493,7 +1716,20 @@ body: | ; VI: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C11]] ; VI: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C12]](s16) ; VI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] - ; VI: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16), [[OR4]](s16), [[OR5]](s16) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C13]](s32) + ; VI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL6]] + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; VI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C13]](s32) + ; VI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL7]] + ; VI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; VI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) + ; VI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C13]](s32) + ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] + ; VI: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR6]](s32), [[OR7]](s32), [[OR8]](s32) ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) ; GFX9-LABEL: name: test_load_flat_s96_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 @@ -1569,7 +1805,20 @@ body: | ; GFX9: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C11]] ; GFX9: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C12]](s16) ; GFX9: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] - ; GFX9: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16), [[OR4]](s16), [[OR5]](s16) + ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C13]](s32) + ; GFX9: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL6]] + ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; GFX9: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C13]](s32) + ; GFX9: [[OR7:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL7]] + ; GFX9: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; GFX9: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) + ; GFX9: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C13]](s32) + ; GFX9: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] + ; GFX9: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR6]](s32), [[OR7]](s32), [[OR8]](s32) ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) ; CI-MESA-LABEL: name: test_load_flat_s96_align1 ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 @@ -1657,7 +1906,20 @@ body: | ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C12]](s32) ; CI-MESA: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) ; CI-MESA: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] - ; CI-MESA: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16), [[OR4]](s16), [[OR5]](s16) + ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI-MESA: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C14]](s32) + ; CI-MESA: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL6]] + ; CI-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CI-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C14]](s32) + ; CI-MESA: [[OR7:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL7]] + ; CI-MESA: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; CI-MESA: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) + ; CI-MESA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C14]](s32) + ; CI-MESA: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] + ; CI-MESA: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR6]](s32), [[OR7]](s32), [[OR8]](s32) ; CI-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) ; GFX9-MESA-LABEL: name: test_load_flat_s96_align1 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 @@ -1733,7 +1995,20 @@ body: | ; GFX9-MESA: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C11]] ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C12]](s16) ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] - ; GFX9-MESA: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16), [[OR4]](s16), [[OR5]](s16) + ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9-MESA: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C13]](s32) + ; GFX9-MESA: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL6]] + ; GFX9-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; GFX9-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; GFX9-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C13]](s32) + ; GFX9-MESA: [[OR7:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL7]] + ; GFX9-MESA: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; GFX9-MESA: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) + ; GFX9-MESA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C13]](s32) + ; GFX9-MESA: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] + ; GFX9-MESA: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR6]](s32), [[OR7]](s32), [[OR8]](s32) ; GFX9-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(s96) = G_LOAD %0 :: (load 12, align 1, addrspace 0) @@ -2057,7 +2332,24 @@ body: | ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C16]](s32) ; CI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) ; CI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] - ; CI: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16), [[OR4]](s16), [[OR5]](s16), [[OR6]](s16), [[OR7]](s16) + ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C18]](s32) + ; CI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL8]] + ; CI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C18]](s32) + ; CI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL9]] + ; CI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; CI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) + ; CI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C18]](s32) + ; CI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] + ; CI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; CI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; CI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C18]](s32) + ; CI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; CI: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR8]](s32), [[OR9]](s32), [[OR10]](s32), [[OR11]](s32) ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) ; VI-LABEL: name: test_load_flat_s128_align1 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 @@ -2157,7 +2449,24 @@ body: | ; VI: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C15]] ; VI: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C16]](s16) ; VI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL7]] - ; VI: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16), [[OR4]](s16), [[OR5]](s16), [[OR6]](s16), [[OR7]](s16) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C17]](s32) + ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL8]] + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; VI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C17]](s32) + ; VI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL9]] + ; VI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; VI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) + ; VI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C17]](s32) + ; VI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] + ; VI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; VI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C17]](s32) + ; VI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; VI: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR8]](s32), [[OR9]](s32), [[OR10]](s32), [[OR11]](s32) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) ; GFX9-LABEL: name: test_load_flat_s128_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 @@ -2257,7 +2566,24 @@ body: | ; GFX9: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C15]] ; GFX9: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C16]](s16) ; GFX9: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL7]] - ; GFX9: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16), [[OR4]](s16), [[OR5]](s16), [[OR6]](s16), [[OR7]](s16) + ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C17]](s32) + ; GFX9: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL8]] + ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; GFX9: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C17]](s32) + ; GFX9: [[OR9:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL9]] + ; GFX9: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; GFX9: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) + ; GFX9: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C17]](s32) + ; GFX9: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] + ; GFX9: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; GFX9: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; GFX9: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C17]](s32) + ; GFX9: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; GFX9: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR8]](s32), [[OR9]](s32), [[OR10]](s32), [[OR11]](s32) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) ; CI-MESA-LABEL: name: test_load_flat_s128_align1 ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 @@ -2373,7 +2699,24 @@ body: | ; CI-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C16]](s32) ; CI-MESA: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) ; CI-MESA: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] - ; CI-MESA: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16), [[OR4]](s16), [[OR5]](s16), [[OR6]](s16), [[OR7]](s16) + ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI-MESA: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C18]](s32) + ; CI-MESA: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL8]] + ; CI-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CI-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI-MESA: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C18]](s32) + ; CI-MESA: [[OR9:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL9]] + ; CI-MESA: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; CI-MESA: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) + ; CI-MESA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C18]](s32) + ; CI-MESA: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] + ; CI-MESA: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; CI-MESA: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; CI-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C18]](s32) + ; CI-MESA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; CI-MESA: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR8]](s32), [[OR9]](s32), [[OR10]](s32), [[OR11]](s32) ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) ; GFX9-MESA-LABEL: name: test_load_flat_s128_align1 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 @@ -2473,7 +2816,24 @@ body: | ; GFX9-MESA: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C15]] ; GFX9-MESA: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C16]](s16) ; GFX9-MESA: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL7]] - ; GFX9-MESA: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16), [[OR4]](s16), [[OR5]](s16), [[OR6]](s16), [[OR7]](s16) + ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9-MESA: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C17]](s32) + ; GFX9-MESA: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL8]] + ; GFX9-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; GFX9-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; GFX9-MESA: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C17]](s32) + ; GFX9-MESA: [[OR9:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL9]] + ; GFX9-MESA: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; GFX9-MESA: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) + ; GFX9-MESA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C17]](s32) + ; GFX9-MESA: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] + ; GFX9-MESA: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; GFX9-MESA: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; GFX9-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C17]](s32) + ; GFX9-MESA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; GFX9-MESA: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR8]](s32), [[OR9]](s32), [[OR10]](s32), [[OR11]](s32) ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(s128) = G_LOAD %0 :: (load 16, align 1, addrspace 0) @@ -2657,7 +3017,16 @@ body: | ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32) ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) + ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32) + ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; CI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32) + ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; CI: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; CI: $vgpr0_vgpr1 = COPY [[MV]](p1) ; VI-LABEL: name: test_load_flat_p1_align1 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 @@ -2709,7 +3078,16 @@ body: | ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]] ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16) ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; VI: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) + ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) + ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; VI: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; VI: $vgpr0_vgpr1 = COPY [[MV]](p1) ; GFX9-LABEL: name: test_load_flat_p1_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 @@ -2761,7 +3139,16 @@ body: | ; GFX9: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]] ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16) ; GFX9: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; GFX9: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) + ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) + ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) + ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; GFX9: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[MV]](p1) ; CI-MESA-LABEL: name: test_load_flat_p1_align1 ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 @@ -2821,7 +3208,16 @@ body: | ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32) ; CI-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; CI-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI-MESA: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) + ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI-MESA: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32) + ; CI-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; CI-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CI-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32) + ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; CI-MESA: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; CI-MESA: $vgpr0_vgpr1 = COPY [[MV]](p1) ; GFX9-MESA-LABEL: name: test_load_flat_p1_align1 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 @@ -2873,7 +3269,16 @@ body: | ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]] ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16) ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; GFX9-MESA: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) + ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9-MESA: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) + ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; GFX9-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; GFX9-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) + ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; GFX9-MESA: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[MV]](p1) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(p1) = G_LOAD %0 :: (load 8, align 1, addrspace 0) @@ -2982,92 +3387,142 @@ body: | ; CI-LABEL: name: test_load_flat_p4_align2 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) - ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C]](s64) ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p0) :: (load 2) - ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI: [[GEP1:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C1]](s64) ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p0) :: (load 2) - ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; CI: [[GEP2:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C2]](s64) ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p0) :: (load 2) - ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; CI: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) + ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; CI: $vgpr0_vgpr1 = COPY [[MV]](p4) ; VI-LABEL: name: test_load_flat_p4_align2 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C]](s64) ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p0) :: (load 2) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[GEP1:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C1]](s64) ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p0) :: (load 2) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; VI: [[GEP2:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C2]](s64) ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p0) :: (load 2) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; VI: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) + ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; VI: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; VI: $vgpr0_vgpr1 = COPY [[MV]](p4) ; GFX9-LABEL: name: test_load_flat_p4_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C]](s64) ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p0) :: (load 2) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9: [[GEP1:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C1]](s64) ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p0) :: (load 2) - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; GFX9: [[GEP2:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C2]](s64) ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p0) :: (load 2) - ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX9: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) + ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; GFX9: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[MV]](p4) ; CI-MESA-LABEL: name: test_load_flat_p4_align2 ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI-MESA: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C]](s64) ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p0) :: (load 2) - ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI-MESA: [[GEP1:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C1]](s64) ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p0) :: (load 2) - ; CI-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; CI-MESA: [[GEP2:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C2]](s64) ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p0) :: (load 2) - ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; CI-MESA: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) + ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI-MESA: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; CI-MESA: $vgpr0_vgpr1 = COPY [[MV]](p4) ; GFX9-MESA-LABEL: name: test_load_flat_p4_align2 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-MESA: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C]](s64) ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p0) :: (load 2) - ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9-MESA: [[GEP1:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C1]](s64) ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p0) :: (load 2) - ; GFX9-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; GFX9-MESA: [[GEP2:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C2]](s64) ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p0) :: (load 2) - ; GFX9-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX9-MESA: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) + ; GFX9-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; GFX9-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; GFX9-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; GFX9-MESA: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[MV]](p4) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(p4) = G_LOAD %0 :: (load 8, align 2, addrspace 0) @@ -3138,7 +3593,16 @@ body: | ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32) ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) + ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32) + ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; CI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32) + ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; CI: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; CI: $vgpr0_vgpr1 = COPY [[MV]](p4) ; VI-LABEL: name: test_load_flat_p4_align1 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 @@ -3190,7 +3654,16 @@ body: | ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]] ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16) ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; VI: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) + ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) + ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; VI: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; VI: $vgpr0_vgpr1 = COPY [[MV]](p4) ; GFX9-LABEL: name: test_load_flat_p4_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 @@ -3242,7 +3715,16 @@ body: | ; GFX9: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]] ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16) ; GFX9: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; GFX9: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) + ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) + ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) + ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; GFX9: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[MV]](p4) ; CI-MESA-LABEL: name: test_load_flat_p4_align1 ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 @@ -3302,7 +3784,16 @@ body: | ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32) ; CI-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; CI-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI-MESA: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) + ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI-MESA: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32) + ; CI-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; CI-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CI-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32) + ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; CI-MESA: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; CI-MESA: $vgpr0_vgpr1 = COPY [[MV]](p4) ; GFX9-MESA-LABEL: name: test_load_flat_p4_align1 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 @@ -3354,7 +3845,16 @@ body: | ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]] ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16) ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; GFX9-MESA: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) + ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9-MESA: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) + ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; GFX9-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; GFX9-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) + ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; GFX9-MESA: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[MV]](p4) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(p4) = G_LOAD %0 :: (load 8, align 1, addrspace 0) @@ -3401,53 +3901,83 @@ body: | ; CI-LABEL: name: test_load_flat_p5_align2 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) - ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C]](s64) ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p0) :: (load 2) - ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; CI: [[MV:%[0-9]+]]:_(p5) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; CI: $vgpr0 = COPY [[MV]](p5) + ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) + ; CI: $vgpr0 = COPY [[INTTOPTR]](p5) ; VI-LABEL: name: test_load_flat_p5_align2 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C]](s64) ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p0) :: (load 2) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; VI: [[MV:%[0-9]+]]:_(p5) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; VI: $vgpr0 = COPY [[MV]](p5) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; VI: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) + ; VI: $vgpr0 = COPY [[INTTOPTR]](p5) ; GFX9-LABEL: name: test_load_flat_p5_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C]](s64) ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p0) :: (load 2) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9: [[MV:%[0-9]+]]:_(p5) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9: $vgpr0 = COPY [[MV]](p5) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX9: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) + ; GFX9: $vgpr0 = COPY [[INTTOPTR]](p5) ; CI-MESA-LABEL: name: test_load_flat_p5_align2 ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI-MESA: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C]](s64) ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p0) :: (load 2) - ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; CI-MESA: [[MV:%[0-9]+]]:_(p5) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; CI-MESA: $vgpr0 = COPY [[MV]](p5) + ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI-MESA: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) + ; CI-MESA: $vgpr0 = COPY [[INTTOPTR]](p5) ; GFX9-MESA-LABEL: name: test_load_flat_p5_align2 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-MESA: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C]](s64) ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p0) :: (load 2) - ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9-MESA: [[MV:%[0-9]+]]:_(p5) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-MESA: $vgpr0 = COPY [[MV]](p5) + ; GFX9-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; GFX9-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; GFX9-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX9-MESA: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) + ; GFX9-MESA: $vgpr0 = COPY [[INTTOPTR]](p5) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(p5) = G_LOAD %0 :: (load 4, align 2, addrspace 0) $vgpr0 = COPY %1 @@ -3489,8 +4019,13 @@ body: | ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI: [[MV:%[0-9]+]]:_(p5) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; CI: $vgpr0 = COPY [[MV]](p5) + ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32) + ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; CI: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) + ; CI: $vgpr0 = COPY [[INTTOPTR]](p5) ; VI-LABEL: name: test_load_flat_p5_align1 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) @@ -3517,8 +4052,13 @@ body: | ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; VI: [[MV:%[0-9]+]]:_(p5) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; VI: $vgpr0 = COPY [[MV]](p5) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; VI: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) + ; VI: $vgpr0 = COPY [[INTTOPTR]](p5) ; GFX9-LABEL: name: test_load_flat_p5_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) @@ -3545,8 +4085,13 @@ body: | ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX9: [[MV:%[0-9]+]]:_(p5) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; GFX9: $vgpr0 = COPY [[MV]](p5) + ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; GFX9: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) + ; GFX9: $vgpr0 = COPY [[INTTOPTR]](p5) ; CI-MESA-LABEL: name: test_load_flat_p5_align1 ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) @@ -3577,8 +4122,13 @@ body: | ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CI-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI-MESA: [[MV:%[0-9]+]]:_(p5) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; CI-MESA: $vgpr0 = COPY [[MV]](p5) + ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32) + ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; CI-MESA: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) + ; CI-MESA: $vgpr0 = COPY [[INTTOPTR]](p5) ; GFX9-MESA-LABEL: name: test_load_flat_p5_align1 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) @@ -3605,8 +4155,13 @@ body: | ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX9-MESA: [[MV:%[0-9]+]]:_(p5) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; GFX9-MESA: $vgpr0 = COPY [[MV]](p5) + ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; GFX9-MESA: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) + ; GFX9-MESA: $vgpr0 = COPY [[INTTOPTR]](p5) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(p5) = G_LOAD %0 :: (load 4, align 1, addrspace 0) $vgpr0 = COPY %1 @@ -6480,166 +7035,256 @@ body: | ; CI-LABEL: name: test_load_flat_v2s64_align2 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) - ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C]](s64) ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p0) :: (load 2) - ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI: [[GEP1:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C1]](s64) ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p0) :: (load 2) - ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; CI: [[GEP2:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C2]](s64) ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p0) :: (load 2) - ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) - ; CI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI: [[GEP3:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C3]](s64) + ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) + ; CI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; CI: [[GEP3:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C5]](s64) ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p0) :: (load 2) - ; CI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; CI: [[GEP4:%[0-9]+]]:_(p0) = G_GEP [[GEP3]], [[C]](s64) ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p0) :: (load 2) - ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) ; CI: [[GEP5:%[0-9]+]]:_(p0) = G_GEP [[GEP3]], [[C1]](s64) ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[GEP5]](p0) :: (load 2) - ; CI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; CI: [[GEP6:%[0-9]+]]:_(p0) = G_GEP [[GEP3]], [[C2]](s64) ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[GEP6]](p0) :: (load 2) - ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; CI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[TRUNC4]](s16), [[TRUNC5]](s16), [[TRUNC6]](s16), [[TRUNC7]](s16) + ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] + ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] + ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) + ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] + ; CI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) + ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]] + ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C4]](s32) + ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; CI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR3]](s32) ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; VI-LABEL: name: test_load_flat_v2s64_align2 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C]](s64) ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p0) :: (load 2) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[GEP1:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C1]](s64) ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p0) :: (load 2) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; VI: [[GEP2:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C2]](s64) ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p0) :: (load 2) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) - ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI: [[GEP3:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C3]](s64) + ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) + ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; VI: [[GEP3:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C5]](s64) ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p0) :: (load 2) - ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; VI: [[GEP4:%[0-9]+]]:_(p0) = G_GEP [[GEP3]], [[C]](s64) ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p0) :: (load 2) - ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) ; VI: [[GEP5:%[0-9]+]]:_(p0) = G_GEP [[GEP3]], [[C1]](s64) ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[GEP5]](p0) :: (load 2) - ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; VI: [[GEP6:%[0-9]+]]:_(p0) = G_GEP [[GEP3]], [[C2]](s64) ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[GEP6]](p0) :: (load 2) - ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; VI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[TRUNC4]](s16), [[TRUNC5]](s16), [[TRUNC6]](s16), [[TRUNC7]](s16) + ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] + ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] + ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) + ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]] + ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C4]](s32) + ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; VI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR3]](s32) ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; GFX9-LABEL: name: test_load_flat_v2s64_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C]](s64) ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p0) :: (load 2) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9: [[GEP1:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C1]](s64) ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p0) :: (load 2) - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; GFX9: [[GEP2:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C2]](s64) ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p0) :: (load 2) - ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GFX9: [[GEP3:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C3]](s64) + ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) + ; GFX9: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; GFX9: [[GEP3:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C5]](s64) ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p0) :: (load 2) - ; GFX9: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; GFX9: [[GEP4:%[0-9]+]]:_(p0) = G_GEP [[GEP3]], [[C]](s64) ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p0) :: (load 2) - ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) ; GFX9: [[GEP5:%[0-9]+]]:_(p0) = G_GEP [[GEP3]], [[C1]](s64) ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[GEP5]](p0) :: (load 2) - ; GFX9: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; GFX9: [[GEP6:%[0-9]+]]:_(p0) = G_GEP [[GEP3]], [[C2]](s64) ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[GEP6]](p0) :: (load 2) - ; GFX9: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[TRUNC4]](s16), [[TRUNC5]](s16), [[TRUNC6]](s16), [[TRUNC7]](s16) + ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] + ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] + ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) + ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; GFX9: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] + ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) + ; GFX9: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]] + ; GFX9: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C4]](s32) + ; GFX9: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR3]](s32) ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; CI-MESA-LABEL: name: test_load_flat_v2s64_align2 ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI-MESA: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C]](s64) ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p0) :: (load 2) - ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI-MESA: [[GEP1:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C1]](s64) ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p0) :: (load 2) - ; CI-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; CI-MESA: [[GEP2:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C2]](s64) ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p0) :: (load 2) - ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; CI-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) - ; CI-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-MESA: [[GEP3:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C3]](s64) + ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) + ; CI-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; CI-MESA: [[GEP3:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C5]](s64) ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p0) :: (load 2) - ; CI-MESA: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; CI-MESA: [[GEP4:%[0-9]+]]:_(p0) = G_GEP [[GEP3]], [[C]](s64) ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p0) :: (load 2) - ; CI-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) ; CI-MESA: [[GEP5:%[0-9]+]]:_(p0) = G_GEP [[GEP3]], [[C1]](s64) ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[GEP5]](p0) :: (load 2) - ; CI-MESA: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; CI-MESA: [[GEP6:%[0-9]+]]:_(p0) = G_GEP [[GEP3]], [[C2]](s64) ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[GEP6]](p0) :: (load 2) - ; CI-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; CI-MESA: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[TRUNC4]](s16), [[TRUNC5]](s16), [[TRUNC6]](s16), [[TRUNC7]](s16) + ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] + ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] + ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) + ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CI-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; CI-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] + ; CI-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) + ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]] + ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C4]](s32) + ; CI-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; CI-MESA: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR3]](s32) ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; GFX9-MESA-LABEL: name: test_load_flat_v2s64_align2 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-MESA: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C]](s64) ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p0) :: (load 2) - ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9-MESA: [[GEP1:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C1]](s64) ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p0) :: (load 2) - ; GFX9-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; GFX9-MESA: [[GEP2:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C2]](s64) ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p0) :: (load 2) - ; GFX9-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX9-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GFX9-MESA: [[GEP3:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C3]](s64) + ; GFX9-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; GFX9-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; GFX9-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; GFX9-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) + ; GFX9-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; GFX9-MESA: [[GEP3:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C5]](s64) ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p0) :: (load 2) - ; GFX9-MESA: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; GFX9-MESA: [[GEP4:%[0-9]+]]:_(p0) = G_GEP [[GEP3]], [[C]](s64) ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p0) :: (load 2) - ; GFX9-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) ; GFX9-MESA: [[GEP5:%[0-9]+]]:_(p0) = G_GEP [[GEP3]], [[C1]](s64) ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[GEP5]](p0) :: (load 2) - ; GFX9-MESA: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; GFX9-MESA: [[GEP6:%[0-9]+]]:_(p0) = G_GEP [[GEP3]], [[C2]](s64) ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[GEP6]](p0) :: (load 2) - ; GFX9-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; GFX9-MESA: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[TRUNC4]](s16), [[TRUNC5]](s16), [[TRUNC6]](s16), [[TRUNC7]](s16) + ; GFX9-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; GFX9-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] + ; GFX9-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; GFX9-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] + ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) + ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; GFX9-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; GFX9-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] + ; GFX9-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) + ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]] + ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C4]](s32) + ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; GFX9-MESA: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR3]](s32) ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) %0:_(p0) = COPY $vgpr0_vgpr1 @@ -6711,9 +7356,18 @@ body: | ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32) ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) - ; CI: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI: [[GEP7:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C10]](s64) + ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32) + ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; CI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32) + ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) + ; CI: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; CI: [[GEP7:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C11]](s64) ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p0) :: (load 1) ; CI: [[GEP8:%[0-9]+]]:_(p0) = G_GEP [[GEP7]], [[C]](s64) ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p0) :: (load 1) @@ -6734,34 +7388,42 @@ body: | ; CI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) ; CI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C9]] - ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY8]](s32) - ; CI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; CI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] + ; CI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY8]](s32) + ; CI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) + ; CI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] ; CI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; CI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] ; CI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) ; CI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C9]] - ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY10]](s32) - ; CI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) - ; CI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] + ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY10]](s32) + ; CI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) + ; CI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] ; CI: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32) ; CI: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]] ; CI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) ; CI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C9]] - ; CI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY12]](s32) - ; CI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) - ; CI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] + ; CI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY12]](s32) + ; CI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32) + ; CI: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] ; CI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; CI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]] ; CI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32) ; CI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C9]] - ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY14]](s32) - ; CI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) - ; CI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] - ; CI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16), [[OR6]](s16), [[OR7]](s16) + ; CI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY14]](s32) + ; CI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) + ; CI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] + ; CI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; CI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; CI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C10]](s32) + ; CI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] + ; CI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) + ; CI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; CI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C10]](s32) + ; CI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; CI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; VI-LABEL: name: test_load_flat_v2s64_align1 @@ -6814,9 +7476,18 @@ body: | ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]] ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16) ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) - ; VI: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI: [[GEP7:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C9]](s64) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) + ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) + ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) + ; VI: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; VI: [[GEP7:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C10]](s64) ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p0) :: (load 1) ; VI: [[GEP8:%[0-9]+]]:_(p0) = G_GEP [[GEP7]], [[C]](s64) ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p0) :: (load 1) @@ -6836,27 +7507,35 @@ body: | ; VI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] ; VI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) ; VI: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]] - ; VI: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) - ; VI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]] + ; VI: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) + ; VI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]] ; VI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; VI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] ; VI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) ; VI: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]] - ; VI: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) - ; VI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] + ; VI: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) + ; VI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]] ; VI: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32) ; VI: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]] ; VI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32) ; VI: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]] - ; VI: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16) - ; VI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL6]] + ; VI: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16) + ; VI: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]] ; VI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; VI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]] ; VI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32) ; VI: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]] - ; VI: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16) - ; VI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL7]] - ; VI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16), [[OR6]](s16), [[OR7]](s16) + ; VI: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16) + ; VI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]] + ; VI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; VI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; VI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32) + ; VI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] + ; VI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) + ; VI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32) + ; VI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; VI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; GFX9-LABEL: name: test_load_flat_v2s64_align1 @@ -6909,9 +7588,18 @@ body: | ; GFX9: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]] ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16) ; GFX9: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) - ; GFX9: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GFX9: [[GEP7:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C9]](s64) + ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) + ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) + ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) + ; GFX9: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; GFX9: [[GEP7:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C10]](s64) ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p0) :: (load 1) ; GFX9: [[GEP8:%[0-9]+]]:_(p0) = G_GEP [[GEP7]], [[C]](s64) ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p0) :: (load 1) @@ -6931,27 +7619,35 @@ body: | ; GFX9: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] ; GFX9: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) ; GFX9: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]] - ; GFX9: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) - ; GFX9: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]] + ; GFX9: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) + ; GFX9: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]] ; GFX9: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; GFX9: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] ; GFX9: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) ; GFX9: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]] - ; GFX9: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) - ; GFX9: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] + ; GFX9: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) + ; GFX9: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]] ; GFX9: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32) ; GFX9: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]] ; GFX9: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32) ; GFX9: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]] - ; GFX9: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16) - ; GFX9: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL6]] + ; GFX9: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16) + ; GFX9: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]] ; GFX9: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; GFX9: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]] ; GFX9: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32) ; GFX9: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]] - ; GFX9: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16) - ; GFX9: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL7]] - ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16), [[OR6]](s16), [[OR7]](s16) + ; GFX9: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16) + ; GFX9: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]] + ; GFX9: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; GFX9: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; GFX9: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32) + ; GFX9: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] + ; GFX9: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) + ; GFX9: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; GFX9: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32) + ; GFX9: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; CI-MESA-LABEL: name: test_load_flat_v2s64_align1 @@ -7012,9 +7708,18 @@ body: | ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32) ; CI-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; CI-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) - ; CI-MESA: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-MESA: [[GEP7:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C10]](s64) + ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI-MESA: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32) + ; CI-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; CI-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CI-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32) + ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; CI-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) + ; CI-MESA: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; CI-MESA: [[GEP7:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C11]](s64) ; CI-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p0) :: (load 1) ; CI-MESA: [[GEP8:%[0-9]+]]:_(p0) = G_GEP [[GEP7]], [[C]](s64) ; CI-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p0) :: (load 1) @@ -7035,34 +7740,42 @@ body: | ; CI-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) ; CI-MESA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C9]] - ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY8]](s32) - ; CI-MESA: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; CI-MESA: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] + ; CI-MESA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY8]](s32) + ; CI-MESA: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) + ; CI-MESA: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] ; CI-MESA: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; CI-MESA: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] ; CI-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI-MESA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) ; CI-MESA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C9]] - ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY10]](s32) - ; CI-MESA: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) - ; CI-MESA: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] + ; CI-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY10]](s32) + ; CI-MESA: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) + ; CI-MESA: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] ; CI-MESA: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32) ; CI-MESA: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]] ; CI-MESA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI-MESA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) ; CI-MESA: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C9]] - ; CI-MESA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY12]](s32) - ; CI-MESA: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) - ; CI-MESA: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] + ; CI-MESA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY12]](s32) + ; CI-MESA: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32) + ; CI-MESA: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] ; CI-MESA: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; CI-MESA: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]] ; CI-MESA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI-MESA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32) ; CI-MESA: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C9]] - ; CI-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY14]](s32) - ; CI-MESA: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) - ; CI-MESA: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] - ; CI-MESA: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16), [[OR6]](s16), [[OR7]](s16) + ; CI-MESA: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY14]](s32) + ; CI-MESA: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) + ; CI-MESA: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] + ; CI-MESA: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; CI-MESA: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; CI-MESA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C10]](s32) + ; CI-MESA: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] + ; CI-MESA: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) + ; CI-MESA: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; CI-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C10]](s32) + ; CI-MESA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; CI-MESA: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; GFX9-MESA-LABEL: name: test_load_flat_v2s64_align1 @@ -7115,9 +7828,18 @@ body: | ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]] ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16) ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; GFX9-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) - ; GFX9-MESA: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GFX9-MESA: [[GEP7:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C9]](s64) + ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9-MESA: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) + ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; GFX9-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; GFX9-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) + ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; GFX9-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) + ; GFX9-MESA: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; GFX9-MESA: [[GEP7:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C10]](s64) ; GFX9-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p0) :: (load 1) ; GFX9-MESA: [[GEP8:%[0-9]+]]:_(p0) = G_GEP [[GEP7]], [[C]](s64) ; GFX9-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p0) :: (load 1) @@ -7137,27 +7859,35 @@ body: | ; GFX9-MESA: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] ; GFX9-MESA: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) ; GFX9-MESA: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]] - ; GFX9-MESA: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) - ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]] + ; GFX9-MESA: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) + ; GFX9-MESA: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]] ; GFX9-MESA: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; GFX9-MESA: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] ; GFX9-MESA: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) ; GFX9-MESA: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]] - ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) - ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] + ; GFX9-MESA: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) + ; GFX9-MESA: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]] ; GFX9-MESA: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32) ; GFX9-MESA: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]] ; GFX9-MESA: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32) ; GFX9-MESA: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]] - ; GFX9-MESA: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16) - ; GFX9-MESA: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL6]] + ; GFX9-MESA: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16) + ; GFX9-MESA: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]] ; GFX9-MESA: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; GFX9-MESA: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]] ; GFX9-MESA: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32) ; GFX9-MESA: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]] - ; GFX9-MESA: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16) - ; GFX9-MESA: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL7]] - ; GFX9-MESA: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16), [[OR6]](s16), [[OR7]](s16) + ; GFX9-MESA: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16) + ; GFX9-MESA: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]] + ; GFX9-MESA: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; GFX9-MESA: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; GFX9-MESA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32) + ; GFX9-MESA: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] + ; GFX9-MESA: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) + ; GFX9-MESA: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; GFX9-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32) + ; GFX9-MESA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; GFX9-MESA: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) %0:_(p0) = COPY $vgpr0_vgpr1 @@ -7385,9 +8115,18 @@ body: | ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32) ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) - ; CI: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI: [[GEP7:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C10]](s64) + ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32) + ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; CI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32) + ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) + ; CI: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; CI: [[GEP7:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C11]](s64) ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p0) :: (load 1) ; CI: [[GEP8:%[0-9]+]]:_(p0) = G_GEP [[GEP7]], [[C]](s64) ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p0) :: (load 1) @@ -7408,36 +8147,44 @@ body: | ; CI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) ; CI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C9]] - ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY8]](s32) - ; CI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; CI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] + ; CI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY8]](s32) + ; CI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) + ; CI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] ; CI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; CI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] ; CI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) ; CI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C9]] - ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY10]](s32) - ; CI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) - ; CI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] + ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY10]](s32) + ; CI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) + ; CI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] ; CI: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32) ; CI: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]] ; CI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) ; CI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C9]] - ; CI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY12]](s32) - ; CI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) - ; CI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] + ; CI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY12]](s32) + ; CI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32) + ; CI: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] ; CI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; CI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]] ; CI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32) ; CI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C9]] - ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY14]](s32) - ; CI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) - ; CI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] - ; CI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16), [[OR6]](s16), [[OR7]](s16) - ; CI: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI: [[GEP15:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C11]](s64) + ; CI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY14]](s32) + ; CI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) + ; CI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] + ; CI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; CI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; CI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C10]](s32) + ; CI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] + ; CI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) + ; CI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; CI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C10]](s32) + ; CI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; CI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) + ; CI: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; CI: [[GEP15:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C12]](s64) ; CI: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[GEP15]](p0) :: (load 1) ; CI: [[GEP16:%[0-9]+]]:_(p0) = G_GEP [[GEP15]], [[C]](s64) ; CI: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[GEP16]](p0) :: (load 1) @@ -7458,34 +8205,42 @@ body: | ; CI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LOAD17]](s32) ; CI: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C9]] - ; CI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[COPY16]](s32) - ; CI: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32) - ; CI: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[TRUNC17]] + ; CI: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[COPY16]](s32) + ; CI: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[SHL12]](s32) + ; CI: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[TRUNC17]] ; CI: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32) ; CI: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C7]] ; CI: [[COPY18:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LOAD19]](s32) ; CI: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C9]] - ; CI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[COPY18]](s32) - ; CI: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) - ; CI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[TRUNC19]] + ; CI: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[COPY18]](s32) + ; CI: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[SHL13]](s32) + ; CI: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[TRUNC19]] ; CI: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD20]](s32) ; CI: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C7]] ; CI: [[COPY20:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LOAD21]](s32) ; CI: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C9]] - ; CI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[COPY20]](s32) - ; CI: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[SHL10]](s32) - ; CI: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[TRUNC21]] + ; CI: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[COPY20]](s32) + ; CI: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[SHL14]](s32) + ; CI: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[TRUNC21]] ; CI: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32) ; CI: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C7]] ; CI: [[COPY22:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LOAD23]](s32) ; CI: [[AND23:%[0-9]+]]:_(s32) = G_AND [[COPY23]], [[C9]] - ; CI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[COPY22]](s32) - ; CI: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[SHL11]](s32) - ; CI: [[OR11:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[TRUNC23]] - ; CI: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR8]](s16), [[OR9]](s16), [[OR10]](s16), [[OR11]](s16) + ; CI: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[COPY22]](s32) + ; CI: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[SHL15]](s32) + ; CI: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[TRUNC23]] + ; CI: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16) + ; CI: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16) + ; CI: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C10]](s32) + ; CI: [[OR16:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL16]] + ; CI: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16) + ; CI: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR15]](s16) + ; CI: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C10]](s32) + ; CI: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]] + ; CI: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32) ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) ; CI: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF ; CI: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 @@ -7540,9 +8295,18 @@ body: | ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]] ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16) ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) - ; VI: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI: [[GEP7:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C9]](s64) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) + ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) + ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) + ; VI: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; VI: [[GEP7:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C10]](s64) ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p0) :: (load 1) ; VI: [[GEP8:%[0-9]+]]:_(p0) = G_GEP [[GEP7]], [[C]](s64) ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p0) :: (load 1) @@ -7562,29 +8326,37 @@ body: | ; VI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] ; VI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) ; VI: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]] - ; VI: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) - ; VI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]] + ; VI: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) + ; VI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]] ; VI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; VI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] ; VI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) ; VI: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]] - ; VI: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) - ; VI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] + ; VI: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) + ; VI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]] ; VI: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32) ; VI: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]] ; VI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32) ; VI: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]] - ; VI: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16) - ; VI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL6]] + ; VI: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16) + ; VI: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]] ; VI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; VI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]] ; VI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32) ; VI: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]] - ; VI: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16) - ; VI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL7]] - ; VI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16), [[OR6]](s16), [[OR7]](s16) - ; VI: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; VI: [[GEP15:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C10]](s64) + ; VI: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16) + ; VI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]] + ; VI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; VI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; VI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32) + ; VI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] + ; VI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) + ; VI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32) + ; VI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; VI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) + ; VI: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; VI: [[GEP15:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C11]](s64) ; VI: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[GEP15]](p0) :: (load 1) ; VI: [[GEP16:%[0-9]+]]:_(p0) = G_GEP [[GEP15]], [[C]](s64) ; VI: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[GEP16]](p0) :: (load 1) @@ -7604,27 +8376,35 @@ body: | ; VI: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C7]] ; VI: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD17]](s32) ; VI: [[AND17:%[0-9]+]]:_(s16) = G_AND [[TRUNC17]], [[C7]] - ; VI: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND17]], [[C8]](s16) - ; VI: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[SHL8]] + ; VI: [[SHL12:%[0-9]+]]:_(s16) = G_SHL [[AND17]], [[C8]](s16) + ; VI: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[SHL12]] ; VI: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32) ; VI: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C7]] ; VI: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD19]](s32) ; VI: [[AND19:%[0-9]+]]:_(s16) = G_AND [[TRUNC19]], [[C7]] - ; VI: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND19]], [[C8]](s16) - ; VI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[SHL9]] + ; VI: [[SHL13:%[0-9]+]]:_(s16) = G_SHL [[AND19]], [[C8]](s16) + ; VI: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[SHL13]] ; VI: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD20]](s32) ; VI: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C7]] ; VI: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD21]](s32) ; VI: [[AND21:%[0-9]+]]:_(s16) = G_AND [[TRUNC21]], [[C7]] - ; VI: [[SHL10:%[0-9]+]]:_(s16) = G_SHL [[AND21]], [[C8]](s16) - ; VI: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[SHL10]] + ; VI: [[SHL14:%[0-9]+]]:_(s16) = G_SHL [[AND21]], [[C8]](s16) + ; VI: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[SHL14]] ; VI: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32) ; VI: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C7]] ; VI: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD23]](s32) ; VI: [[AND23:%[0-9]+]]:_(s16) = G_AND [[TRUNC23]], [[C7]] - ; VI: [[SHL11:%[0-9]+]]:_(s16) = G_SHL [[AND23]], [[C8]](s16) - ; VI: [[OR11:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[SHL11]] - ; VI: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR8]](s16), [[OR9]](s16), [[OR10]](s16), [[OR11]](s16) + ; VI: [[SHL15:%[0-9]+]]:_(s16) = G_SHL [[AND23]], [[C8]](s16) + ; VI: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[SHL15]] + ; VI: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16) + ; VI: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16) + ; VI: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C9]](s32) + ; VI: [[OR16:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL16]] + ; VI: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16) + ; VI: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR15]](s16) + ; VI: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C9]](s32) + ; VI: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]] + ; VI: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32) ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) ; VI: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF ; VI: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 @@ -7679,9 +8459,18 @@ body: | ; GFX9: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]] ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16) ; GFX9: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) - ; GFX9: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GFX9: [[GEP7:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C9]](s64) + ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) + ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) + ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) + ; GFX9: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; GFX9: [[GEP7:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C10]](s64) ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p0) :: (load 1) ; GFX9: [[GEP8:%[0-9]+]]:_(p0) = G_GEP [[GEP7]], [[C]](s64) ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p0) :: (load 1) @@ -7701,29 +8490,37 @@ body: | ; GFX9: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] ; GFX9: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) ; GFX9: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]] - ; GFX9: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) - ; GFX9: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]] + ; GFX9: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) + ; GFX9: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]] ; GFX9: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; GFX9: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] ; GFX9: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) ; GFX9: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]] - ; GFX9: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) - ; GFX9: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] + ; GFX9: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) + ; GFX9: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]] ; GFX9: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32) ; GFX9: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]] ; GFX9: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32) ; GFX9: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]] - ; GFX9: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16) - ; GFX9: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL6]] + ; GFX9: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16) + ; GFX9: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]] ; GFX9: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; GFX9: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]] ; GFX9: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32) ; GFX9: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]] - ; GFX9: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16) - ; GFX9: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL7]] - ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16), [[OR6]](s16), [[OR7]](s16) - ; GFX9: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9: [[GEP15:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C10]](s64) + ; GFX9: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16) + ; GFX9: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]] + ; GFX9: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; GFX9: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; GFX9: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32) + ; GFX9: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] + ; GFX9: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) + ; GFX9: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; GFX9: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32) + ; GFX9: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) + ; GFX9: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; GFX9: [[GEP15:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C11]](s64) ; GFX9: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[GEP15]](p0) :: (load 1) ; GFX9: [[GEP16:%[0-9]+]]:_(p0) = G_GEP [[GEP15]], [[C]](s64) ; GFX9: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[GEP16]](p0) :: (load 1) @@ -7743,27 +8540,35 @@ body: | ; GFX9: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C7]] ; GFX9: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD17]](s32) ; GFX9: [[AND17:%[0-9]+]]:_(s16) = G_AND [[TRUNC17]], [[C7]] - ; GFX9: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND17]], [[C8]](s16) - ; GFX9: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[SHL8]] + ; GFX9: [[SHL12:%[0-9]+]]:_(s16) = G_SHL [[AND17]], [[C8]](s16) + ; GFX9: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[SHL12]] ; GFX9: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32) ; GFX9: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C7]] ; GFX9: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD19]](s32) ; GFX9: [[AND19:%[0-9]+]]:_(s16) = G_AND [[TRUNC19]], [[C7]] - ; GFX9: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND19]], [[C8]](s16) - ; GFX9: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[SHL9]] + ; GFX9: [[SHL13:%[0-9]+]]:_(s16) = G_SHL [[AND19]], [[C8]](s16) + ; GFX9: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[SHL13]] ; GFX9: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD20]](s32) ; GFX9: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C7]] ; GFX9: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD21]](s32) ; GFX9: [[AND21:%[0-9]+]]:_(s16) = G_AND [[TRUNC21]], [[C7]] - ; GFX9: [[SHL10:%[0-9]+]]:_(s16) = G_SHL [[AND21]], [[C8]](s16) - ; GFX9: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[SHL10]] + ; GFX9: [[SHL14:%[0-9]+]]:_(s16) = G_SHL [[AND21]], [[C8]](s16) + ; GFX9: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[SHL14]] ; GFX9: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32) ; GFX9: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C7]] ; GFX9: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD23]](s32) ; GFX9: [[AND23:%[0-9]+]]:_(s16) = G_AND [[TRUNC23]], [[C7]] - ; GFX9: [[SHL11:%[0-9]+]]:_(s16) = G_SHL [[AND23]], [[C8]](s16) - ; GFX9: [[OR11:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[SHL11]] - ; GFX9: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR8]](s16), [[OR9]](s16), [[OR10]](s16), [[OR11]](s16) + ; GFX9: [[SHL15:%[0-9]+]]:_(s16) = G_SHL [[AND23]], [[C8]](s16) + ; GFX9: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[SHL15]] + ; GFX9: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16) + ; GFX9: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16) + ; GFX9: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C9]](s32) + ; GFX9: [[OR16:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL16]] + ; GFX9: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16) + ; GFX9: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR15]](s16) + ; GFX9: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C9]](s32) + ; GFX9: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]] + ; GFX9: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32) ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 @@ -7826,9 +8631,18 @@ body: | ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32) ; CI-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; CI-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) - ; CI-MESA: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-MESA: [[GEP7:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C10]](s64) + ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI-MESA: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32) + ; CI-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; CI-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CI-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32) + ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; CI-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) + ; CI-MESA: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; CI-MESA: [[GEP7:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C11]](s64) ; CI-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p0) :: (load 1) ; CI-MESA: [[GEP8:%[0-9]+]]:_(p0) = G_GEP [[GEP7]], [[C]](s64) ; CI-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p0) :: (load 1) @@ -7849,36 +8663,44 @@ body: | ; CI-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) ; CI-MESA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C9]] - ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY8]](s32) - ; CI-MESA: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; CI-MESA: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] + ; CI-MESA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY8]](s32) + ; CI-MESA: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) + ; CI-MESA: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] ; CI-MESA: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; CI-MESA: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] ; CI-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI-MESA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) ; CI-MESA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C9]] - ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY10]](s32) - ; CI-MESA: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) - ; CI-MESA: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] + ; CI-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY10]](s32) + ; CI-MESA: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) + ; CI-MESA: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] ; CI-MESA: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32) ; CI-MESA: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]] ; CI-MESA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI-MESA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) ; CI-MESA: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C9]] - ; CI-MESA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY12]](s32) - ; CI-MESA: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) - ; CI-MESA: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] + ; CI-MESA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY12]](s32) + ; CI-MESA: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32) + ; CI-MESA: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] ; CI-MESA: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; CI-MESA: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]] ; CI-MESA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI-MESA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32) ; CI-MESA: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C9]] - ; CI-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY14]](s32) - ; CI-MESA: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) - ; CI-MESA: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] - ; CI-MESA: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16), [[OR6]](s16), [[OR7]](s16) - ; CI-MESA: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI-MESA: [[GEP15:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C11]](s64) + ; CI-MESA: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY14]](s32) + ; CI-MESA: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) + ; CI-MESA: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] + ; CI-MESA: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; CI-MESA: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; CI-MESA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C10]](s32) + ; CI-MESA: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] + ; CI-MESA: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) + ; CI-MESA: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; CI-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C10]](s32) + ; CI-MESA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; CI-MESA: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) + ; CI-MESA: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; CI-MESA: [[GEP15:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C12]](s64) ; CI-MESA: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[GEP15]](p0) :: (load 1) ; CI-MESA: [[GEP16:%[0-9]+]]:_(p0) = G_GEP [[GEP15]], [[C]](s64) ; CI-MESA: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[GEP16]](p0) :: (load 1) @@ -7899,34 +8721,42 @@ body: | ; CI-MESA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI-MESA: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LOAD17]](s32) ; CI-MESA: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C9]] - ; CI-MESA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[COPY16]](s32) - ; CI-MESA: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32) - ; CI-MESA: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[TRUNC17]] + ; CI-MESA: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[COPY16]](s32) + ; CI-MESA: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[SHL12]](s32) + ; CI-MESA: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[TRUNC17]] ; CI-MESA: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32) ; CI-MESA: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C7]] ; CI-MESA: [[COPY18:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI-MESA: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LOAD19]](s32) ; CI-MESA: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C9]] - ; CI-MESA: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[COPY18]](s32) - ; CI-MESA: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) - ; CI-MESA: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[TRUNC19]] + ; CI-MESA: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[COPY18]](s32) + ; CI-MESA: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[SHL13]](s32) + ; CI-MESA: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[TRUNC19]] ; CI-MESA: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD20]](s32) ; CI-MESA: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C7]] ; CI-MESA: [[COPY20:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI-MESA: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LOAD21]](s32) ; CI-MESA: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C9]] - ; CI-MESA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[COPY20]](s32) - ; CI-MESA: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[SHL10]](s32) - ; CI-MESA: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[TRUNC21]] + ; CI-MESA: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[COPY20]](s32) + ; CI-MESA: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[SHL14]](s32) + ; CI-MESA: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[TRUNC21]] ; CI-MESA: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32) ; CI-MESA: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C7]] ; CI-MESA: [[COPY22:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI-MESA: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LOAD23]](s32) ; CI-MESA: [[AND23:%[0-9]+]]:_(s32) = G_AND [[COPY23]], [[C9]] - ; CI-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[COPY22]](s32) - ; CI-MESA: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[SHL11]](s32) - ; CI-MESA: [[OR11:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[TRUNC23]] - ; CI-MESA: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR8]](s16), [[OR9]](s16), [[OR10]](s16), [[OR11]](s16) + ; CI-MESA: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[COPY22]](s32) + ; CI-MESA: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[SHL15]](s32) + ; CI-MESA: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[TRUNC23]] + ; CI-MESA: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16) + ; CI-MESA: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16) + ; CI-MESA: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C10]](s32) + ; CI-MESA: [[OR16:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL16]] + ; CI-MESA: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16) + ; CI-MESA: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR15]](s16) + ; CI-MESA: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C10]](s32) + ; CI-MESA: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]] + ; CI-MESA: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32) ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) ; CI-MESA: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 @@ -7981,9 +8811,18 @@ body: | ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]] ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16) ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; GFX9-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) - ; GFX9-MESA: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GFX9-MESA: [[GEP7:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C9]](s64) + ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9-MESA: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) + ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; GFX9-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; GFX9-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) + ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; GFX9-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) + ; GFX9-MESA: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; GFX9-MESA: [[GEP7:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C10]](s64) ; GFX9-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p0) :: (load 1) ; GFX9-MESA: [[GEP8:%[0-9]+]]:_(p0) = G_GEP [[GEP7]], [[C]](s64) ; GFX9-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p0) :: (load 1) @@ -8003,29 +8842,37 @@ body: | ; GFX9-MESA: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] ; GFX9-MESA: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) ; GFX9-MESA: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]] - ; GFX9-MESA: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) - ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]] + ; GFX9-MESA: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) + ; GFX9-MESA: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]] ; GFX9-MESA: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; GFX9-MESA: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] ; GFX9-MESA: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) ; GFX9-MESA: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]] - ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) - ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] + ; GFX9-MESA: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) + ; GFX9-MESA: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]] ; GFX9-MESA: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32) ; GFX9-MESA: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]] ; GFX9-MESA: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32) ; GFX9-MESA: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]] - ; GFX9-MESA: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16) - ; GFX9-MESA: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL6]] + ; GFX9-MESA: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16) + ; GFX9-MESA: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]] ; GFX9-MESA: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; GFX9-MESA: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]] ; GFX9-MESA: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32) ; GFX9-MESA: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]] - ; GFX9-MESA: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16) - ; GFX9-MESA: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL7]] - ; GFX9-MESA: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16), [[OR6]](s16), [[OR7]](s16) - ; GFX9-MESA: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9-MESA: [[GEP15:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C10]](s64) + ; GFX9-MESA: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16) + ; GFX9-MESA: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]] + ; GFX9-MESA: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; GFX9-MESA: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; GFX9-MESA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32) + ; GFX9-MESA: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] + ; GFX9-MESA: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) + ; GFX9-MESA: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; GFX9-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32) + ; GFX9-MESA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; GFX9-MESA: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) + ; GFX9-MESA: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; GFX9-MESA: [[GEP15:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C11]](s64) ; GFX9-MESA: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[GEP15]](p0) :: (load 1) ; GFX9-MESA: [[GEP16:%[0-9]+]]:_(p0) = G_GEP [[GEP15]], [[C]](s64) ; GFX9-MESA: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[GEP16]](p0) :: (load 1) @@ -8045,27 +8892,35 @@ body: | ; GFX9-MESA: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C7]] ; GFX9-MESA: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD17]](s32) ; GFX9-MESA: [[AND17:%[0-9]+]]:_(s16) = G_AND [[TRUNC17]], [[C7]] - ; GFX9-MESA: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND17]], [[C8]](s16) - ; GFX9-MESA: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[SHL8]] + ; GFX9-MESA: [[SHL12:%[0-9]+]]:_(s16) = G_SHL [[AND17]], [[C8]](s16) + ; GFX9-MESA: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[SHL12]] ; GFX9-MESA: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32) ; GFX9-MESA: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C7]] ; GFX9-MESA: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD19]](s32) ; GFX9-MESA: [[AND19:%[0-9]+]]:_(s16) = G_AND [[TRUNC19]], [[C7]] - ; GFX9-MESA: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND19]], [[C8]](s16) - ; GFX9-MESA: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[SHL9]] + ; GFX9-MESA: [[SHL13:%[0-9]+]]:_(s16) = G_SHL [[AND19]], [[C8]](s16) + ; GFX9-MESA: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[SHL13]] ; GFX9-MESA: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD20]](s32) ; GFX9-MESA: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C7]] ; GFX9-MESA: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD21]](s32) ; GFX9-MESA: [[AND21:%[0-9]+]]:_(s16) = G_AND [[TRUNC21]], [[C7]] - ; GFX9-MESA: [[SHL10:%[0-9]+]]:_(s16) = G_SHL [[AND21]], [[C8]](s16) - ; GFX9-MESA: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[SHL10]] + ; GFX9-MESA: [[SHL14:%[0-9]+]]:_(s16) = G_SHL [[AND21]], [[C8]](s16) + ; GFX9-MESA: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[SHL14]] ; GFX9-MESA: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32) ; GFX9-MESA: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C7]] ; GFX9-MESA: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD23]](s32) ; GFX9-MESA: [[AND23:%[0-9]+]]:_(s16) = G_AND [[TRUNC23]], [[C7]] - ; GFX9-MESA: [[SHL11:%[0-9]+]]:_(s16) = G_SHL [[AND23]], [[C8]](s16) - ; GFX9-MESA: [[OR11:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[SHL11]] - ; GFX9-MESA: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR8]](s16), [[OR9]](s16), [[OR10]](s16), [[OR11]](s16) + ; GFX9-MESA: [[SHL15:%[0-9]+]]:_(s16) = G_SHL [[AND23]], [[C8]](s16) + ; GFX9-MESA: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[SHL15]] + ; GFX9-MESA: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16) + ; GFX9-MESA: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16) + ; GFX9-MESA: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C9]](s32) + ; GFX9-MESA: [[OR16:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL16]] + ; GFX9-MESA: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16) + ; GFX9-MESA: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR15]](s16) + ; GFX9-MESA: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C9]](s32) + ; GFX9-MESA: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]] + ; GFX9-MESA: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32) ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 @@ -8243,9 +9098,18 @@ body: | ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32) ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) - ; CI: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI: [[GEP7:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C10]](s64) + ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32) + ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; CI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32) + ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) + ; CI: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; CI: [[GEP7:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C11]](s64) ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p0) :: (load 1) ; CI: [[GEP8:%[0-9]+]]:_(p0) = G_GEP [[GEP7]], [[C]](s64) ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p0) :: (load 1) @@ -8266,37 +9130,45 @@ body: | ; CI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) ; CI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C9]] - ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY8]](s32) - ; CI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; CI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] + ; CI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY8]](s32) + ; CI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) + ; CI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] ; CI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; CI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] ; CI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) ; CI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C9]] - ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY10]](s32) - ; CI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) - ; CI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] + ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY10]](s32) + ; CI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) + ; CI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] ; CI: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32) ; CI: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]] ; CI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) ; CI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C9]] - ; CI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY12]](s32) - ; CI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) - ; CI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] + ; CI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY12]](s32) + ; CI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32) + ; CI: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] ; CI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; CI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]] ; CI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32) ; CI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C9]] - ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY14]](s32) - ; CI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) - ; CI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] - ; CI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16), [[OR6]](s16), [[OR7]](s16) + ; CI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY14]](s32) + ; CI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) + ; CI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] + ; CI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; CI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; CI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C10]](s32) + ; CI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] + ; CI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) + ; CI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; CI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C10]](s32) + ; CI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; CI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) - ; CI: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI: [[GEP15:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C11]](s64) + ; CI: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; CI: [[GEP15:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C12]](s64) ; CI: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[GEP15]](p0) :: (load 1) ; CI: [[GEP16:%[0-9]+]]:_(p0) = G_GEP [[GEP15]], [[C]](s64) ; CI: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[GEP16]](p0) :: (load 1) @@ -8317,35 +9189,43 @@ body: | ; CI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LOAD17]](s32) ; CI: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C9]] - ; CI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[COPY16]](s32) - ; CI: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32) - ; CI: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[TRUNC17]] + ; CI: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[COPY16]](s32) + ; CI: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[SHL12]](s32) + ; CI: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[TRUNC17]] ; CI: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32) ; CI: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C7]] ; CI: [[COPY18:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LOAD19]](s32) ; CI: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C9]] - ; CI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[COPY18]](s32) - ; CI: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) - ; CI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[TRUNC19]] + ; CI: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[COPY18]](s32) + ; CI: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[SHL13]](s32) + ; CI: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[TRUNC19]] ; CI: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD20]](s32) ; CI: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C7]] ; CI: [[COPY20:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LOAD21]](s32) ; CI: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C9]] - ; CI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[COPY20]](s32) - ; CI: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[SHL10]](s32) - ; CI: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[TRUNC21]] + ; CI: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[COPY20]](s32) + ; CI: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[SHL14]](s32) + ; CI: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[TRUNC21]] ; CI: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32) ; CI: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C7]] ; CI: [[COPY22:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LOAD23]](s32) ; CI: [[AND23:%[0-9]+]]:_(s32) = G_AND [[COPY23]], [[C9]] - ; CI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[COPY22]](s32) - ; CI: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[SHL11]](s32) - ; CI: [[OR11:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[TRUNC23]] - ; CI: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR8]](s16), [[OR9]](s16), [[OR10]](s16), [[OR11]](s16) - ; CI: [[GEP23:%[0-9]+]]:_(p0) = G_GEP [[GEP15]], [[C10]](s64) + ; CI: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[COPY22]](s32) + ; CI: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[SHL15]](s32) + ; CI: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[TRUNC23]] + ; CI: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16) + ; CI: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16) + ; CI: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C10]](s32) + ; CI: [[OR16:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL16]] + ; CI: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16) + ; CI: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR15]](s16) + ; CI: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C10]](s32) + ; CI: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]] + ; CI: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32) + ; CI: [[GEP23:%[0-9]+]]:_(p0) = G_GEP [[GEP15]], [[C11]](s64) ; CI: [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[GEP23]](p0) :: (load 1) ; CI: [[GEP24:%[0-9]+]]:_(p0) = G_GEP [[GEP23]], [[C]](s64) ; CI: [[LOAD25:%[0-9]+]]:_(s32) = G_LOAD [[GEP24]](p0) :: (load 1) @@ -8366,34 +9246,42 @@ body: | ; CI: [[COPY24:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI: [[COPY25:%[0-9]+]]:_(s32) = COPY [[LOAD25]](s32) ; CI: [[AND25:%[0-9]+]]:_(s32) = G_AND [[COPY25]], [[C9]] - ; CI: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND25]], [[COPY24]](s32) - ; CI: [[TRUNC25:%[0-9]+]]:_(s16) = G_TRUNC [[SHL12]](s32) - ; CI: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND24]], [[TRUNC25]] + ; CI: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[AND25]], [[COPY24]](s32) + ; CI: [[TRUNC25:%[0-9]+]]:_(s16) = G_TRUNC [[SHL18]](s32) + ; CI: [[OR18:%[0-9]+]]:_(s16) = G_OR [[AND24]], [[TRUNC25]] ; CI: [[TRUNC26:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD26]](s32) ; CI: [[AND26:%[0-9]+]]:_(s16) = G_AND [[TRUNC26]], [[C7]] ; CI: [[COPY26:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI: [[COPY27:%[0-9]+]]:_(s32) = COPY [[LOAD27]](s32) ; CI: [[AND27:%[0-9]+]]:_(s32) = G_AND [[COPY27]], [[C9]] - ; CI: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND27]], [[COPY26]](s32) - ; CI: [[TRUNC27:%[0-9]+]]:_(s16) = G_TRUNC [[SHL13]](s32) - ; CI: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND26]], [[TRUNC27]] + ; CI: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[AND27]], [[COPY26]](s32) + ; CI: [[TRUNC27:%[0-9]+]]:_(s16) = G_TRUNC [[SHL19]](s32) + ; CI: [[OR19:%[0-9]+]]:_(s16) = G_OR [[AND26]], [[TRUNC27]] ; CI: [[TRUNC28:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD28]](s32) ; CI: [[AND28:%[0-9]+]]:_(s16) = G_AND [[TRUNC28]], [[C7]] ; CI: [[COPY28:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI: [[COPY29:%[0-9]+]]:_(s32) = COPY [[LOAD29]](s32) ; CI: [[AND29:%[0-9]+]]:_(s32) = G_AND [[COPY29]], [[C9]] - ; CI: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND29]], [[COPY28]](s32) - ; CI: [[TRUNC29:%[0-9]+]]:_(s16) = G_TRUNC [[SHL14]](s32) - ; CI: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND28]], [[TRUNC29]] + ; CI: [[SHL20:%[0-9]+]]:_(s32) = G_SHL [[AND29]], [[COPY28]](s32) + ; CI: [[TRUNC29:%[0-9]+]]:_(s16) = G_TRUNC [[SHL20]](s32) + ; CI: [[OR20:%[0-9]+]]:_(s16) = G_OR [[AND28]], [[TRUNC29]] ; CI: [[TRUNC30:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD30]](s32) ; CI: [[AND30:%[0-9]+]]:_(s16) = G_AND [[TRUNC30]], [[C7]] ; CI: [[COPY30:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI: [[COPY31:%[0-9]+]]:_(s32) = COPY [[LOAD31]](s32) ; CI: [[AND31:%[0-9]+]]:_(s32) = G_AND [[COPY31]], [[C9]] - ; CI: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND31]], [[COPY30]](s32) - ; CI: [[TRUNC31:%[0-9]+]]:_(s16) = G_TRUNC [[SHL15]](s32) - ; CI: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND30]], [[TRUNC31]] - ; CI: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR12]](s16), [[OR13]](s16), [[OR14]](s16), [[OR15]](s16) + ; CI: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[AND31]], [[COPY30]](s32) + ; CI: [[TRUNC31:%[0-9]+]]:_(s16) = G_TRUNC [[SHL21]](s32) + ; CI: [[OR21:%[0-9]+]]:_(s16) = G_OR [[AND30]], [[TRUNC31]] + ; CI: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[OR18]](s16) + ; CI: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[OR19]](s16) + ; CI: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[ZEXT13]], [[C10]](s32) + ; CI: [[OR22:%[0-9]+]]:_(s32) = G_OR [[ZEXT12]], [[SHL22]] + ; CI: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[OR20]](s16) + ; CI: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[OR21]](s16) + ; CI: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[ZEXT15]], [[C10]](s32) + ; CI: [[OR23:%[0-9]+]]:_(s32) = G_OR [[ZEXT14]], [[SHL23]] + ; CI: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR22]](s32), [[OR23]](s32) ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV2]](s64), [[MV3]](s64) ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s64>), [[BUILD_VECTOR1]](<2 x s64>) ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>) @@ -8447,9 +9335,18 @@ body: | ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]] ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16) ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) - ; VI: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI: [[GEP7:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C9]](s64) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) + ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) + ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) + ; VI: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; VI: [[GEP7:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C10]](s64) ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p0) :: (load 1) ; VI: [[GEP8:%[0-9]+]]:_(p0) = G_GEP [[GEP7]], [[C]](s64) ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p0) :: (load 1) @@ -8469,30 +9366,38 @@ body: | ; VI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] ; VI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) ; VI: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]] - ; VI: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) - ; VI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]] + ; VI: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) + ; VI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]] ; VI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; VI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] ; VI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) ; VI: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]] - ; VI: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) - ; VI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] + ; VI: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) + ; VI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]] ; VI: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32) ; VI: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]] ; VI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32) ; VI: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]] - ; VI: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16) - ; VI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL6]] + ; VI: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16) + ; VI: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]] ; VI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; VI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]] ; VI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32) ; VI: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]] - ; VI: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16) - ; VI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL7]] - ; VI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16), [[OR6]](s16), [[OR7]](s16) + ; VI: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16) + ; VI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]] + ; VI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; VI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; VI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32) + ; VI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] + ; VI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) + ; VI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32) + ; VI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; VI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) - ; VI: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; VI: [[GEP15:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C10]](s64) + ; VI: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; VI: [[GEP15:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C11]](s64) ; VI: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[GEP15]](p0) :: (load 1) ; VI: [[GEP16:%[0-9]+]]:_(p0) = G_GEP [[GEP15]], [[C]](s64) ; VI: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[GEP16]](p0) :: (load 1) @@ -8512,28 +9417,36 @@ body: | ; VI: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C7]] ; VI: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD17]](s32) ; VI: [[AND17:%[0-9]+]]:_(s16) = G_AND [[TRUNC17]], [[C7]] - ; VI: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND17]], [[C8]](s16) - ; VI: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[SHL8]] + ; VI: [[SHL12:%[0-9]+]]:_(s16) = G_SHL [[AND17]], [[C8]](s16) + ; VI: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[SHL12]] ; VI: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32) ; VI: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C7]] ; VI: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD19]](s32) ; VI: [[AND19:%[0-9]+]]:_(s16) = G_AND [[TRUNC19]], [[C7]] - ; VI: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND19]], [[C8]](s16) - ; VI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[SHL9]] + ; VI: [[SHL13:%[0-9]+]]:_(s16) = G_SHL [[AND19]], [[C8]](s16) + ; VI: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[SHL13]] ; VI: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD20]](s32) ; VI: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C7]] ; VI: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD21]](s32) ; VI: [[AND21:%[0-9]+]]:_(s16) = G_AND [[TRUNC21]], [[C7]] - ; VI: [[SHL10:%[0-9]+]]:_(s16) = G_SHL [[AND21]], [[C8]](s16) - ; VI: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[SHL10]] + ; VI: [[SHL14:%[0-9]+]]:_(s16) = G_SHL [[AND21]], [[C8]](s16) + ; VI: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[SHL14]] ; VI: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32) ; VI: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C7]] ; VI: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD23]](s32) ; VI: [[AND23:%[0-9]+]]:_(s16) = G_AND [[TRUNC23]], [[C7]] - ; VI: [[SHL11:%[0-9]+]]:_(s16) = G_SHL [[AND23]], [[C8]](s16) - ; VI: [[OR11:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[SHL11]] - ; VI: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR8]](s16), [[OR9]](s16), [[OR10]](s16), [[OR11]](s16) - ; VI: [[GEP23:%[0-9]+]]:_(p0) = G_GEP [[GEP15]], [[C9]](s64) + ; VI: [[SHL15:%[0-9]+]]:_(s16) = G_SHL [[AND23]], [[C8]](s16) + ; VI: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[SHL15]] + ; VI: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16) + ; VI: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16) + ; VI: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C9]](s32) + ; VI: [[OR16:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL16]] + ; VI: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16) + ; VI: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR15]](s16) + ; VI: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C9]](s32) + ; VI: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]] + ; VI: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32) + ; VI: [[GEP23:%[0-9]+]]:_(p0) = G_GEP [[GEP15]], [[C10]](s64) ; VI: [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[GEP23]](p0) :: (load 1) ; VI: [[GEP24:%[0-9]+]]:_(p0) = G_GEP [[GEP23]], [[C]](s64) ; VI: [[LOAD25:%[0-9]+]]:_(s32) = G_LOAD [[GEP24]](p0) :: (load 1) @@ -8553,27 +9466,35 @@ body: | ; VI: [[AND24:%[0-9]+]]:_(s16) = G_AND [[TRUNC24]], [[C7]] ; VI: [[TRUNC25:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD25]](s32) ; VI: [[AND25:%[0-9]+]]:_(s16) = G_AND [[TRUNC25]], [[C7]] - ; VI: [[SHL12:%[0-9]+]]:_(s16) = G_SHL [[AND25]], [[C8]](s16) - ; VI: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND24]], [[SHL12]] + ; VI: [[SHL18:%[0-9]+]]:_(s16) = G_SHL [[AND25]], [[C8]](s16) + ; VI: [[OR18:%[0-9]+]]:_(s16) = G_OR [[AND24]], [[SHL18]] ; VI: [[TRUNC26:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD26]](s32) ; VI: [[AND26:%[0-9]+]]:_(s16) = G_AND [[TRUNC26]], [[C7]] ; VI: [[TRUNC27:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD27]](s32) ; VI: [[AND27:%[0-9]+]]:_(s16) = G_AND [[TRUNC27]], [[C7]] - ; VI: [[SHL13:%[0-9]+]]:_(s16) = G_SHL [[AND27]], [[C8]](s16) - ; VI: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND26]], [[SHL13]] + ; VI: [[SHL19:%[0-9]+]]:_(s16) = G_SHL [[AND27]], [[C8]](s16) + ; VI: [[OR19:%[0-9]+]]:_(s16) = G_OR [[AND26]], [[SHL19]] ; VI: [[TRUNC28:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD28]](s32) ; VI: [[AND28:%[0-9]+]]:_(s16) = G_AND [[TRUNC28]], [[C7]] ; VI: [[TRUNC29:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD29]](s32) ; VI: [[AND29:%[0-9]+]]:_(s16) = G_AND [[TRUNC29]], [[C7]] - ; VI: [[SHL14:%[0-9]+]]:_(s16) = G_SHL [[AND29]], [[C8]](s16) - ; VI: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND28]], [[SHL14]] + ; VI: [[SHL20:%[0-9]+]]:_(s16) = G_SHL [[AND29]], [[C8]](s16) + ; VI: [[OR20:%[0-9]+]]:_(s16) = G_OR [[AND28]], [[SHL20]] ; VI: [[TRUNC30:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD30]](s32) ; VI: [[AND30:%[0-9]+]]:_(s16) = G_AND [[TRUNC30]], [[C7]] ; VI: [[TRUNC31:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD31]](s32) ; VI: [[AND31:%[0-9]+]]:_(s16) = G_AND [[TRUNC31]], [[C7]] - ; VI: [[SHL15:%[0-9]+]]:_(s16) = G_SHL [[AND31]], [[C8]](s16) - ; VI: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND30]], [[SHL15]] - ; VI: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR12]](s16), [[OR13]](s16), [[OR14]](s16), [[OR15]](s16) + ; VI: [[SHL21:%[0-9]+]]:_(s16) = G_SHL [[AND31]], [[C8]](s16) + ; VI: [[OR21:%[0-9]+]]:_(s16) = G_OR [[AND30]], [[SHL21]] + ; VI: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[OR18]](s16) + ; VI: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[OR19]](s16) + ; VI: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[ZEXT13]], [[C9]](s32) + ; VI: [[OR22:%[0-9]+]]:_(s32) = G_OR [[ZEXT12]], [[SHL22]] + ; VI: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[OR20]](s16) + ; VI: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[OR21]](s16) + ; VI: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[ZEXT15]], [[C9]](s32) + ; VI: [[OR23:%[0-9]+]]:_(s32) = G_OR [[ZEXT14]], [[SHL23]] + ; VI: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR22]](s32), [[OR23]](s32) ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV2]](s64), [[MV3]](s64) ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s64>), [[BUILD_VECTOR1]](<2 x s64>) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>) @@ -8627,9 +9548,18 @@ body: | ; GFX9: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]] ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16) ; GFX9: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) - ; GFX9: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GFX9: [[GEP7:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C9]](s64) + ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) + ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) + ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) + ; GFX9: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; GFX9: [[GEP7:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C10]](s64) ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p0) :: (load 1) ; GFX9: [[GEP8:%[0-9]+]]:_(p0) = G_GEP [[GEP7]], [[C]](s64) ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p0) :: (load 1) @@ -8649,30 +9579,38 @@ body: | ; GFX9: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] ; GFX9: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) ; GFX9: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]] - ; GFX9: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) - ; GFX9: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]] + ; GFX9: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) + ; GFX9: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]] ; GFX9: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; GFX9: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] ; GFX9: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) ; GFX9: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]] - ; GFX9: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) - ; GFX9: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] + ; GFX9: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) + ; GFX9: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]] ; GFX9: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32) ; GFX9: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]] ; GFX9: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32) ; GFX9: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]] - ; GFX9: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16) - ; GFX9: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL6]] + ; GFX9: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16) + ; GFX9: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]] ; GFX9: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; GFX9: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]] ; GFX9: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32) ; GFX9: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]] - ; GFX9: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16) - ; GFX9: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL7]] - ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16), [[OR6]](s16), [[OR7]](s16) + ; GFX9: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16) + ; GFX9: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]] + ; GFX9: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; GFX9: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; GFX9: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32) + ; GFX9: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] + ; GFX9: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) + ; GFX9: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; GFX9: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32) + ; GFX9: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) - ; GFX9: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9: [[GEP15:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C10]](s64) + ; GFX9: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; GFX9: [[GEP15:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C11]](s64) ; GFX9: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[GEP15]](p0) :: (load 1) ; GFX9: [[GEP16:%[0-9]+]]:_(p0) = G_GEP [[GEP15]], [[C]](s64) ; GFX9: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[GEP16]](p0) :: (load 1) @@ -8692,28 +9630,36 @@ body: | ; GFX9: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C7]] ; GFX9: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD17]](s32) ; GFX9: [[AND17:%[0-9]+]]:_(s16) = G_AND [[TRUNC17]], [[C7]] - ; GFX9: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND17]], [[C8]](s16) - ; GFX9: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[SHL8]] + ; GFX9: [[SHL12:%[0-9]+]]:_(s16) = G_SHL [[AND17]], [[C8]](s16) + ; GFX9: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[SHL12]] ; GFX9: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32) ; GFX9: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C7]] ; GFX9: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD19]](s32) ; GFX9: [[AND19:%[0-9]+]]:_(s16) = G_AND [[TRUNC19]], [[C7]] - ; GFX9: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND19]], [[C8]](s16) - ; GFX9: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[SHL9]] + ; GFX9: [[SHL13:%[0-9]+]]:_(s16) = G_SHL [[AND19]], [[C8]](s16) + ; GFX9: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[SHL13]] ; GFX9: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD20]](s32) ; GFX9: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C7]] ; GFX9: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD21]](s32) ; GFX9: [[AND21:%[0-9]+]]:_(s16) = G_AND [[TRUNC21]], [[C7]] - ; GFX9: [[SHL10:%[0-9]+]]:_(s16) = G_SHL [[AND21]], [[C8]](s16) - ; GFX9: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[SHL10]] + ; GFX9: [[SHL14:%[0-9]+]]:_(s16) = G_SHL [[AND21]], [[C8]](s16) + ; GFX9: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[SHL14]] ; GFX9: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32) ; GFX9: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C7]] ; GFX9: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD23]](s32) ; GFX9: [[AND23:%[0-9]+]]:_(s16) = G_AND [[TRUNC23]], [[C7]] - ; GFX9: [[SHL11:%[0-9]+]]:_(s16) = G_SHL [[AND23]], [[C8]](s16) - ; GFX9: [[OR11:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[SHL11]] - ; GFX9: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR8]](s16), [[OR9]](s16), [[OR10]](s16), [[OR11]](s16) - ; GFX9: [[GEP23:%[0-9]+]]:_(p0) = G_GEP [[GEP15]], [[C9]](s64) + ; GFX9: [[SHL15:%[0-9]+]]:_(s16) = G_SHL [[AND23]], [[C8]](s16) + ; GFX9: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[SHL15]] + ; GFX9: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16) + ; GFX9: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16) + ; GFX9: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C9]](s32) + ; GFX9: [[OR16:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL16]] + ; GFX9: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16) + ; GFX9: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR15]](s16) + ; GFX9: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C9]](s32) + ; GFX9: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]] + ; GFX9: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32) + ; GFX9: [[GEP23:%[0-9]+]]:_(p0) = G_GEP [[GEP15]], [[C10]](s64) ; GFX9: [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[GEP23]](p0) :: (load 1) ; GFX9: [[GEP24:%[0-9]+]]:_(p0) = G_GEP [[GEP23]], [[C]](s64) ; GFX9: [[LOAD25:%[0-9]+]]:_(s32) = G_LOAD [[GEP24]](p0) :: (load 1) @@ -8733,27 +9679,35 @@ body: | ; GFX9: [[AND24:%[0-9]+]]:_(s16) = G_AND [[TRUNC24]], [[C7]] ; GFX9: [[TRUNC25:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD25]](s32) ; GFX9: [[AND25:%[0-9]+]]:_(s16) = G_AND [[TRUNC25]], [[C7]] - ; GFX9: [[SHL12:%[0-9]+]]:_(s16) = G_SHL [[AND25]], [[C8]](s16) - ; GFX9: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND24]], [[SHL12]] + ; GFX9: [[SHL18:%[0-9]+]]:_(s16) = G_SHL [[AND25]], [[C8]](s16) + ; GFX9: [[OR18:%[0-9]+]]:_(s16) = G_OR [[AND24]], [[SHL18]] ; GFX9: [[TRUNC26:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD26]](s32) ; GFX9: [[AND26:%[0-9]+]]:_(s16) = G_AND [[TRUNC26]], [[C7]] ; GFX9: [[TRUNC27:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD27]](s32) ; GFX9: [[AND27:%[0-9]+]]:_(s16) = G_AND [[TRUNC27]], [[C7]] - ; GFX9: [[SHL13:%[0-9]+]]:_(s16) = G_SHL [[AND27]], [[C8]](s16) - ; GFX9: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND26]], [[SHL13]] + ; GFX9: [[SHL19:%[0-9]+]]:_(s16) = G_SHL [[AND27]], [[C8]](s16) + ; GFX9: [[OR19:%[0-9]+]]:_(s16) = G_OR [[AND26]], [[SHL19]] ; GFX9: [[TRUNC28:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD28]](s32) ; GFX9: [[AND28:%[0-9]+]]:_(s16) = G_AND [[TRUNC28]], [[C7]] ; GFX9: [[TRUNC29:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD29]](s32) ; GFX9: [[AND29:%[0-9]+]]:_(s16) = G_AND [[TRUNC29]], [[C7]] - ; GFX9: [[SHL14:%[0-9]+]]:_(s16) = G_SHL [[AND29]], [[C8]](s16) - ; GFX9: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND28]], [[SHL14]] + ; GFX9: [[SHL20:%[0-9]+]]:_(s16) = G_SHL [[AND29]], [[C8]](s16) + ; GFX9: [[OR20:%[0-9]+]]:_(s16) = G_OR [[AND28]], [[SHL20]] ; GFX9: [[TRUNC30:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD30]](s32) ; GFX9: [[AND30:%[0-9]+]]:_(s16) = G_AND [[TRUNC30]], [[C7]] ; GFX9: [[TRUNC31:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD31]](s32) ; GFX9: [[AND31:%[0-9]+]]:_(s16) = G_AND [[TRUNC31]], [[C7]] - ; GFX9: [[SHL15:%[0-9]+]]:_(s16) = G_SHL [[AND31]], [[C8]](s16) - ; GFX9: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND30]], [[SHL15]] - ; GFX9: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR12]](s16), [[OR13]](s16), [[OR14]](s16), [[OR15]](s16) + ; GFX9: [[SHL21:%[0-9]+]]:_(s16) = G_SHL [[AND31]], [[C8]](s16) + ; GFX9: [[OR21:%[0-9]+]]:_(s16) = G_OR [[AND30]], [[SHL21]] + ; GFX9: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[OR18]](s16) + ; GFX9: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[OR19]](s16) + ; GFX9: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[ZEXT13]], [[C9]](s32) + ; GFX9: [[OR22:%[0-9]+]]:_(s32) = G_OR [[ZEXT12]], [[SHL22]] + ; GFX9: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[OR20]](s16) + ; GFX9: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[OR21]](s16) + ; GFX9: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[ZEXT15]], [[C9]](s32) + ; GFX9: [[OR23:%[0-9]+]]:_(s32) = G_OR [[ZEXT14]], [[SHL23]] + ; GFX9: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR22]](s32), [[OR23]](s32) ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV2]](s64), [[MV3]](s64) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s64>), [[BUILD_VECTOR1]](<2 x s64>) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>) @@ -8815,9 +9769,18 @@ body: | ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32) ; CI-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; CI-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) - ; CI-MESA: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-MESA: [[GEP7:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C10]](s64) + ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI-MESA: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32) + ; CI-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; CI-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CI-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32) + ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; CI-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) + ; CI-MESA: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; CI-MESA: [[GEP7:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C11]](s64) ; CI-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p0) :: (load 1) ; CI-MESA: [[GEP8:%[0-9]+]]:_(p0) = G_GEP [[GEP7]], [[C]](s64) ; CI-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p0) :: (load 1) @@ -8838,37 +9801,45 @@ body: | ; CI-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) ; CI-MESA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C9]] - ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY8]](s32) - ; CI-MESA: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; CI-MESA: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] + ; CI-MESA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY8]](s32) + ; CI-MESA: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) + ; CI-MESA: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] ; CI-MESA: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; CI-MESA: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] ; CI-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI-MESA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) ; CI-MESA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C9]] - ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY10]](s32) - ; CI-MESA: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) - ; CI-MESA: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] + ; CI-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY10]](s32) + ; CI-MESA: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) + ; CI-MESA: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] ; CI-MESA: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32) ; CI-MESA: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]] ; CI-MESA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI-MESA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) ; CI-MESA: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C9]] - ; CI-MESA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY12]](s32) - ; CI-MESA: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) - ; CI-MESA: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] + ; CI-MESA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY12]](s32) + ; CI-MESA: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32) + ; CI-MESA: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] ; CI-MESA: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; CI-MESA: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]] ; CI-MESA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI-MESA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32) ; CI-MESA: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C9]] - ; CI-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY14]](s32) - ; CI-MESA: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) - ; CI-MESA: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] - ; CI-MESA: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16), [[OR6]](s16), [[OR7]](s16) + ; CI-MESA: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY14]](s32) + ; CI-MESA: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) + ; CI-MESA: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] + ; CI-MESA: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; CI-MESA: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; CI-MESA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C10]](s32) + ; CI-MESA: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] + ; CI-MESA: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) + ; CI-MESA: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; CI-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C10]](s32) + ; CI-MESA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; CI-MESA: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) - ; CI-MESA: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI-MESA: [[GEP15:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C11]](s64) + ; CI-MESA: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; CI-MESA: [[GEP15:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C12]](s64) ; CI-MESA: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[GEP15]](p0) :: (load 1) ; CI-MESA: [[GEP16:%[0-9]+]]:_(p0) = G_GEP [[GEP15]], [[C]](s64) ; CI-MESA: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[GEP16]](p0) :: (load 1) @@ -8889,35 +9860,43 @@ body: | ; CI-MESA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI-MESA: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LOAD17]](s32) ; CI-MESA: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C9]] - ; CI-MESA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[COPY16]](s32) - ; CI-MESA: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32) - ; CI-MESA: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[TRUNC17]] + ; CI-MESA: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[COPY16]](s32) + ; CI-MESA: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[SHL12]](s32) + ; CI-MESA: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[TRUNC17]] ; CI-MESA: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32) ; CI-MESA: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C7]] ; CI-MESA: [[COPY18:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI-MESA: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LOAD19]](s32) ; CI-MESA: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C9]] - ; CI-MESA: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[COPY18]](s32) - ; CI-MESA: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) - ; CI-MESA: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[TRUNC19]] + ; CI-MESA: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[COPY18]](s32) + ; CI-MESA: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[SHL13]](s32) + ; CI-MESA: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[TRUNC19]] ; CI-MESA: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD20]](s32) ; CI-MESA: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C7]] ; CI-MESA: [[COPY20:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI-MESA: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LOAD21]](s32) ; CI-MESA: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C9]] - ; CI-MESA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[COPY20]](s32) - ; CI-MESA: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[SHL10]](s32) - ; CI-MESA: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[TRUNC21]] + ; CI-MESA: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[COPY20]](s32) + ; CI-MESA: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[SHL14]](s32) + ; CI-MESA: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[TRUNC21]] ; CI-MESA: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32) ; CI-MESA: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C7]] ; CI-MESA: [[COPY22:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI-MESA: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LOAD23]](s32) ; CI-MESA: [[AND23:%[0-9]+]]:_(s32) = G_AND [[COPY23]], [[C9]] - ; CI-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[COPY22]](s32) - ; CI-MESA: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[SHL11]](s32) - ; CI-MESA: [[OR11:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[TRUNC23]] - ; CI-MESA: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR8]](s16), [[OR9]](s16), [[OR10]](s16), [[OR11]](s16) - ; CI-MESA: [[GEP23:%[0-9]+]]:_(p0) = G_GEP [[GEP15]], [[C10]](s64) + ; CI-MESA: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[COPY22]](s32) + ; CI-MESA: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[SHL15]](s32) + ; CI-MESA: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[TRUNC23]] + ; CI-MESA: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16) + ; CI-MESA: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16) + ; CI-MESA: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C10]](s32) + ; CI-MESA: [[OR16:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL16]] + ; CI-MESA: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16) + ; CI-MESA: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR15]](s16) + ; CI-MESA: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C10]](s32) + ; CI-MESA: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]] + ; CI-MESA: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32) + ; CI-MESA: [[GEP23:%[0-9]+]]:_(p0) = G_GEP [[GEP15]], [[C11]](s64) ; CI-MESA: [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[GEP23]](p0) :: (load 1) ; CI-MESA: [[GEP24:%[0-9]+]]:_(p0) = G_GEP [[GEP23]], [[C]](s64) ; CI-MESA: [[LOAD25:%[0-9]+]]:_(s32) = G_LOAD [[GEP24]](p0) :: (load 1) @@ -8938,34 +9917,42 @@ body: | ; CI-MESA: [[COPY24:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI-MESA: [[COPY25:%[0-9]+]]:_(s32) = COPY [[LOAD25]](s32) ; CI-MESA: [[AND25:%[0-9]+]]:_(s32) = G_AND [[COPY25]], [[C9]] - ; CI-MESA: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND25]], [[COPY24]](s32) - ; CI-MESA: [[TRUNC25:%[0-9]+]]:_(s16) = G_TRUNC [[SHL12]](s32) - ; CI-MESA: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND24]], [[TRUNC25]] + ; CI-MESA: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[AND25]], [[COPY24]](s32) + ; CI-MESA: [[TRUNC25:%[0-9]+]]:_(s16) = G_TRUNC [[SHL18]](s32) + ; CI-MESA: [[OR18:%[0-9]+]]:_(s16) = G_OR [[AND24]], [[TRUNC25]] ; CI-MESA: [[TRUNC26:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD26]](s32) ; CI-MESA: [[AND26:%[0-9]+]]:_(s16) = G_AND [[TRUNC26]], [[C7]] ; CI-MESA: [[COPY26:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI-MESA: [[COPY27:%[0-9]+]]:_(s32) = COPY [[LOAD27]](s32) ; CI-MESA: [[AND27:%[0-9]+]]:_(s32) = G_AND [[COPY27]], [[C9]] - ; CI-MESA: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND27]], [[COPY26]](s32) - ; CI-MESA: [[TRUNC27:%[0-9]+]]:_(s16) = G_TRUNC [[SHL13]](s32) - ; CI-MESA: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND26]], [[TRUNC27]] + ; CI-MESA: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[AND27]], [[COPY26]](s32) + ; CI-MESA: [[TRUNC27:%[0-9]+]]:_(s16) = G_TRUNC [[SHL19]](s32) + ; CI-MESA: [[OR19:%[0-9]+]]:_(s16) = G_OR [[AND26]], [[TRUNC27]] ; CI-MESA: [[TRUNC28:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD28]](s32) ; CI-MESA: [[AND28:%[0-9]+]]:_(s16) = G_AND [[TRUNC28]], [[C7]] ; CI-MESA: [[COPY28:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI-MESA: [[COPY29:%[0-9]+]]:_(s32) = COPY [[LOAD29]](s32) ; CI-MESA: [[AND29:%[0-9]+]]:_(s32) = G_AND [[COPY29]], [[C9]] - ; CI-MESA: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND29]], [[COPY28]](s32) - ; CI-MESA: [[TRUNC29:%[0-9]+]]:_(s16) = G_TRUNC [[SHL14]](s32) - ; CI-MESA: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND28]], [[TRUNC29]] + ; CI-MESA: [[SHL20:%[0-9]+]]:_(s32) = G_SHL [[AND29]], [[COPY28]](s32) + ; CI-MESA: [[TRUNC29:%[0-9]+]]:_(s16) = G_TRUNC [[SHL20]](s32) + ; CI-MESA: [[OR20:%[0-9]+]]:_(s16) = G_OR [[AND28]], [[TRUNC29]] ; CI-MESA: [[TRUNC30:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD30]](s32) ; CI-MESA: [[AND30:%[0-9]+]]:_(s16) = G_AND [[TRUNC30]], [[C7]] ; CI-MESA: [[COPY30:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI-MESA: [[COPY31:%[0-9]+]]:_(s32) = COPY [[LOAD31]](s32) ; CI-MESA: [[AND31:%[0-9]+]]:_(s32) = G_AND [[COPY31]], [[C9]] - ; CI-MESA: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND31]], [[COPY30]](s32) - ; CI-MESA: [[TRUNC31:%[0-9]+]]:_(s16) = G_TRUNC [[SHL15]](s32) - ; CI-MESA: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND30]], [[TRUNC31]] - ; CI-MESA: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR12]](s16), [[OR13]](s16), [[OR14]](s16), [[OR15]](s16) + ; CI-MESA: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[AND31]], [[COPY30]](s32) + ; CI-MESA: [[TRUNC31:%[0-9]+]]:_(s16) = G_TRUNC [[SHL21]](s32) + ; CI-MESA: [[OR21:%[0-9]+]]:_(s16) = G_OR [[AND30]], [[TRUNC31]] + ; CI-MESA: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[OR18]](s16) + ; CI-MESA: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[OR19]](s16) + ; CI-MESA: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[ZEXT13]], [[C10]](s32) + ; CI-MESA: [[OR22:%[0-9]+]]:_(s32) = G_OR [[ZEXT12]], [[SHL22]] + ; CI-MESA: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[OR20]](s16) + ; CI-MESA: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[OR21]](s16) + ; CI-MESA: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[ZEXT15]], [[C10]](s32) + ; CI-MESA: [[OR23:%[0-9]+]]:_(s32) = G_OR [[ZEXT14]], [[SHL23]] + ; CI-MESA: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR22]](s32), [[OR23]](s32) ; CI-MESA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV2]](s64), [[MV3]](s64) ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s64>), [[BUILD_VECTOR1]](<2 x s64>) ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>) @@ -9019,9 +10006,18 @@ body: | ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]] ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16) ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; GFX9-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) - ; GFX9-MESA: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GFX9-MESA: [[GEP7:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C9]](s64) + ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9-MESA: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) + ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; GFX9-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; GFX9-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) + ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; GFX9-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) + ; GFX9-MESA: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; GFX9-MESA: [[GEP7:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C10]](s64) ; GFX9-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p0) :: (load 1) ; GFX9-MESA: [[GEP8:%[0-9]+]]:_(p0) = G_GEP [[GEP7]], [[C]](s64) ; GFX9-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p0) :: (load 1) @@ -9041,30 +10037,38 @@ body: | ; GFX9-MESA: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] ; GFX9-MESA: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) ; GFX9-MESA: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]] - ; GFX9-MESA: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) - ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]] + ; GFX9-MESA: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) + ; GFX9-MESA: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]] ; GFX9-MESA: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; GFX9-MESA: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] ; GFX9-MESA: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) ; GFX9-MESA: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]] - ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) - ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] + ; GFX9-MESA: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) + ; GFX9-MESA: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]] ; GFX9-MESA: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32) ; GFX9-MESA: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]] ; GFX9-MESA: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32) ; GFX9-MESA: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]] - ; GFX9-MESA: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16) - ; GFX9-MESA: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL6]] + ; GFX9-MESA: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16) + ; GFX9-MESA: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]] ; GFX9-MESA: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; GFX9-MESA: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]] ; GFX9-MESA: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32) ; GFX9-MESA: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]] - ; GFX9-MESA: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16) - ; GFX9-MESA: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL7]] - ; GFX9-MESA: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16), [[OR6]](s16), [[OR7]](s16) + ; GFX9-MESA: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16) + ; GFX9-MESA: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]] + ; GFX9-MESA: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; GFX9-MESA: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; GFX9-MESA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32) + ; GFX9-MESA: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] + ; GFX9-MESA: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) + ; GFX9-MESA: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; GFX9-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32) + ; GFX9-MESA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; GFX9-MESA: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) - ; GFX9-MESA: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9-MESA: [[GEP15:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C10]](s64) + ; GFX9-MESA: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; GFX9-MESA: [[GEP15:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C11]](s64) ; GFX9-MESA: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[GEP15]](p0) :: (load 1) ; GFX9-MESA: [[GEP16:%[0-9]+]]:_(p0) = G_GEP [[GEP15]], [[C]](s64) ; GFX9-MESA: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[GEP16]](p0) :: (load 1) @@ -9084,28 +10088,36 @@ body: | ; GFX9-MESA: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C7]] ; GFX9-MESA: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD17]](s32) ; GFX9-MESA: [[AND17:%[0-9]+]]:_(s16) = G_AND [[TRUNC17]], [[C7]] - ; GFX9-MESA: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND17]], [[C8]](s16) - ; GFX9-MESA: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[SHL8]] + ; GFX9-MESA: [[SHL12:%[0-9]+]]:_(s16) = G_SHL [[AND17]], [[C8]](s16) + ; GFX9-MESA: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[SHL12]] ; GFX9-MESA: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32) ; GFX9-MESA: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C7]] ; GFX9-MESA: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD19]](s32) ; GFX9-MESA: [[AND19:%[0-9]+]]:_(s16) = G_AND [[TRUNC19]], [[C7]] - ; GFX9-MESA: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND19]], [[C8]](s16) - ; GFX9-MESA: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[SHL9]] + ; GFX9-MESA: [[SHL13:%[0-9]+]]:_(s16) = G_SHL [[AND19]], [[C8]](s16) + ; GFX9-MESA: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[SHL13]] ; GFX9-MESA: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD20]](s32) ; GFX9-MESA: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C7]] ; GFX9-MESA: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD21]](s32) ; GFX9-MESA: [[AND21:%[0-9]+]]:_(s16) = G_AND [[TRUNC21]], [[C7]] - ; GFX9-MESA: [[SHL10:%[0-9]+]]:_(s16) = G_SHL [[AND21]], [[C8]](s16) - ; GFX9-MESA: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[SHL10]] + ; GFX9-MESA: [[SHL14:%[0-9]+]]:_(s16) = G_SHL [[AND21]], [[C8]](s16) + ; GFX9-MESA: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[SHL14]] ; GFX9-MESA: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32) ; GFX9-MESA: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C7]] ; GFX9-MESA: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD23]](s32) ; GFX9-MESA: [[AND23:%[0-9]+]]:_(s16) = G_AND [[TRUNC23]], [[C7]] - ; GFX9-MESA: [[SHL11:%[0-9]+]]:_(s16) = G_SHL [[AND23]], [[C8]](s16) - ; GFX9-MESA: [[OR11:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[SHL11]] - ; GFX9-MESA: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR8]](s16), [[OR9]](s16), [[OR10]](s16), [[OR11]](s16) - ; GFX9-MESA: [[GEP23:%[0-9]+]]:_(p0) = G_GEP [[GEP15]], [[C9]](s64) + ; GFX9-MESA: [[SHL15:%[0-9]+]]:_(s16) = G_SHL [[AND23]], [[C8]](s16) + ; GFX9-MESA: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[SHL15]] + ; GFX9-MESA: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16) + ; GFX9-MESA: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16) + ; GFX9-MESA: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C9]](s32) + ; GFX9-MESA: [[OR16:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL16]] + ; GFX9-MESA: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16) + ; GFX9-MESA: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR15]](s16) + ; GFX9-MESA: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C9]](s32) + ; GFX9-MESA: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]] + ; GFX9-MESA: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32) + ; GFX9-MESA: [[GEP23:%[0-9]+]]:_(p0) = G_GEP [[GEP15]], [[C10]](s64) ; GFX9-MESA: [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[GEP23]](p0) :: (load 1) ; GFX9-MESA: [[GEP24:%[0-9]+]]:_(p0) = G_GEP [[GEP23]], [[C]](s64) ; GFX9-MESA: [[LOAD25:%[0-9]+]]:_(s32) = G_LOAD [[GEP24]](p0) :: (load 1) @@ -9125,27 +10137,35 @@ body: | ; GFX9-MESA: [[AND24:%[0-9]+]]:_(s16) = G_AND [[TRUNC24]], [[C7]] ; GFX9-MESA: [[TRUNC25:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD25]](s32) ; GFX9-MESA: [[AND25:%[0-9]+]]:_(s16) = G_AND [[TRUNC25]], [[C7]] - ; GFX9-MESA: [[SHL12:%[0-9]+]]:_(s16) = G_SHL [[AND25]], [[C8]](s16) - ; GFX9-MESA: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND24]], [[SHL12]] + ; GFX9-MESA: [[SHL18:%[0-9]+]]:_(s16) = G_SHL [[AND25]], [[C8]](s16) + ; GFX9-MESA: [[OR18:%[0-9]+]]:_(s16) = G_OR [[AND24]], [[SHL18]] ; GFX9-MESA: [[TRUNC26:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD26]](s32) ; GFX9-MESA: [[AND26:%[0-9]+]]:_(s16) = G_AND [[TRUNC26]], [[C7]] ; GFX9-MESA: [[TRUNC27:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD27]](s32) ; GFX9-MESA: [[AND27:%[0-9]+]]:_(s16) = G_AND [[TRUNC27]], [[C7]] - ; GFX9-MESA: [[SHL13:%[0-9]+]]:_(s16) = G_SHL [[AND27]], [[C8]](s16) - ; GFX9-MESA: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND26]], [[SHL13]] + ; GFX9-MESA: [[SHL19:%[0-9]+]]:_(s16) = G_SHL [[AND27]], [[C8]](s16) + ; GFX9-MESA: [[OR19:%[0-9]+]]:_(s16) = G_OR [[AND26]], [[SHL19]] ; GFX9-MESA: [[TRUNC28:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD28]](s32) ; GFX9-MESA: [[AND28:%[0-9]+]]:_(s16) = G_AND [[TRUNC28]], [[C7]] ; GFX9-MESA: [[TRUNC29:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD29]](s32) ; GFX9-MESA: [[AND29:%[0-9]+]]:_(s16) = G_AND [[TRUNC29]], [[C7]] - ; GFX9-MESA: [[SHL14:%[0-9]+]]:_(s16) = G_SHL [[AND29]], [[C8]](s16) - ; GFX9-MESA: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND28]], [[SHL14]] + ; GFX9-MESA: [[SHL20:%[0-9]+]]:_(s16) = G_SHL [[AND29]], [[C8]](s16) + ; GFX9-MESA: [[OR20:%[0-9]+]]:_(s16) = G_OR [[AND28]], [[SHL20]] ; GFX9-MESA: [[TRUNC30:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD30]](s32) ; GFX9-MESA: [[AND30:%[0-9]+]]:_(s16) = G_AND [[TRUNC30]], [[C7]] ; GFX9-MESA: [[TRUNC31:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD31]](s32) ; GFX9-MESA: [[AND31:%[0-9]+]]:_(s16) = G_AND [[TRUNC31]], [[C7]] - ; GFX9-MESA: [[SHL15:%[0-9]+]]:_(s16) = G_SHL [[AND31]], [[C8]](s16) - ; GFX9-MESA: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND30]], [[SHL15]] - ; GFX9-MESA: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR12]](s16), [[OR13]](s16), [[OR14]](s16), [[OR15]](s16) + ; GFX9-MESA: [[SHL21:%[0-9]+]]:_(s16) = G_SHL [[AND31]], [[C8]](s16) + ; GFX9-MESA: [[OR21:%[0-9]+]]:_(s16) = G_OR [[AND30]], [[SHL21]] + ; GFX9-MESA: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[OR18]](s16) + ; GFX9-MESA: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[OR19]](s16) + ; GFX9-MESA: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[ZEXT13]], [[C9]](s32) + ; GFX9-MESA: [[OR22:%[0-9]+]]:_(s32) = G_OR [[ZEXT12]], [[SHL22]] + ; GFX9-MESA: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[OR20]](s16) + ; GFX9-MESA: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[OR21]](s16) + ; GFX9-MESA: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[ZEXT15]], [[C9]](s32) + ; GFX9-MESA: [[OR23:%[0-9]+]]:_(s32) = G_OR [[ZEXT14]], [[SHL23]] + ; GFX9-MESA: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR22]](s32), [[OR23]](s32) ; GFX9-MESA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV2]](s64), [[MV3]](s64) ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s64>), [[BUILD_VECTOR1]](<2 x s64>) ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>) @@ -9362,9 +10382,18 @@ body: | ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32) ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) - ; CI: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI: [[GEP7:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C10]](s64) + ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32) + ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; CI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32) + ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; CI: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) + ; CI: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; CI: [[GEP7:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C11]](s64) ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p0) :: (load 1) ; CI: [[GEP8:%[0-9]+]]:_(p0) = G_GEP [[GEP7]], [[C]](s64) ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p0) :: (load 1) @@ -9385,34 +10414,42 @@ body: | ; CI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) ; CI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C9]] - ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY8]](s32) - ; CI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; CI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] + ; CI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY8]](s32) + ; CI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) + ; CI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] ; CI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; CI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] ; CI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) ; CI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C9]] - ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY10]](s32) - ; CI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) - ; CI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] + ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY10]](s32) + ; CI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) + ; CI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] ; CI: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32) ; CI: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]] ; CI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) ; CI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C9]] - ; CI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY12]](s32) - ; CI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) - ; CI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] + ; CI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY12]](s32) + ; CI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32) + ; CI: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] ; CI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; CI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]] ; CI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32) ; CI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C9]] - ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY14]](s32) - ; CI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) - ; CI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] - ; CI: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16), [[OR6]](s16), [[OR7]](s16) + ; CI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY14]](s32) + ; CI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) + ; CI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] + ; CI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; CI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; CI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C10]](s32) + ; CI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] + ; CI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) + ; CI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; CI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C10]](s32) + ; CI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; CI: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[MV]](p1), [[MV1]](p1) ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x p1>) ; VI-LABEL: name: test_load_flat_v2p1_align1 @@ -9465,9 +10502,18 @@ body: | ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]] ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16) ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; VI: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) - ; VI: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI: [[GEP7:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C9]](s64) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) + ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) + ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; VI: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) + ; VI: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; VI: [[GEP7:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C10]](s64) ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p0) :: (load 1) ; VI: [[GEP8:%[0-9]+]]:_(p0) = G_GEP [[GEP7]], [[C]](s64) ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p0) :: (load 1) @@ -9487,27 +10533,35 @@ body: | ; VI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] ; VI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) ; VI: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]] - ; VI: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) - ; VI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]] + ; VI: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) + ; VI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]] ; VI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; VI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] ; VI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) ; VI: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]] - ; VI: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) - ; VI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] + ; VI: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) + ; VI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]] ; VI: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32) ; VI: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]] ; VI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32) ; VI: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]] - ; VI: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16) - ; VI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL6]] + ; VI: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16) + ; VI: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]] ; VI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; VI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]] ; VI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32) ; VI: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]] - ; VI: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16) - ; VI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL7]] - ; VI: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16), [[OR6]](s16), [[OR7]](s16) + ; VI: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16) + ; VI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]] + ; VI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; VI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; VI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32) + ; VI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] + ; VI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) + ; VI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32) + ; VI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; VI: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[MV]](p1), [[MV1]](p1) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x p1>) ; GFX9-LABEL: name: test_load_flat_v2p1_align1 @@ -9560,9 +10614,18 @@ body: | ; GFX9: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]] ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16) ; GFX9: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; GFX9: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) - ; GFX9: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GFX9: [[GEP7:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C9]](s64) + ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) + ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) + ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; GFX9: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) + ; GFX9: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; GFX9: [[GEP7:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C10]](s64) ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p0) :: (load 1) ; GFX9: [[GEP8:%[0-9]+]]:_(p0) = G_GEP [[GEP7]], [[C]](s64) ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p0) :: (load 1) @@ -9582,27 +10645,35 @@ body: | ; GFX9: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] ; GFX9: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) ; GFX9: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]] - ; GFX9: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) - ; GFX9: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]] + ; GFX9: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) + ; GFX9: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]] ; GFX9: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; GFX9: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] ; GFX9: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) ; GFX9: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]] - ; GFX9: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) - ; GFX9: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] + ; GFX9: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) + ; GFX9: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]] ; GFX9: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32) ; GFX9: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]] ; GFX9: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32) ; GFX9: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]] - ; GFX9: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16) - ; GFX9: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL6]] + ; GFX9: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16) + ; GFX9: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]] ; GFX9: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; GFX9: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]] ; GFX9: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32) ; GFX9: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]] - ; GFX9: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16) - ; GFX9: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL7]] - ; GFX9: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16), [[OR6]](s16), [[OR7]](s16) + ; GFX9: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16) + ; GFX9: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]] + ; GFX9: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; GFX9: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; GFX9: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32) + ; GFX9: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] + ; GFX9: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) + ; GFX9: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; GFX9: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32) + ; GFX9: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; GFX9: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[MV]](p1), [[MV1]](p1) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x p1>) ; CI-MESA-LABEL: name: test_load_flat_v2p1_align1 @@ -9663,9 +10734,18 @@ body: | ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32) ; CI-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; CI-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI-MESA: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) - ; CI-MESA: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-MESA: [[GEP7:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C10]](s64) + ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI-MESA: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32) + ; CI-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; CI-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CI-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32) + ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; CI-MESA: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) + ; CI-MESA: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; CI-MESA: [[GEP7:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C11]](s64) ; CI-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p0) :: (load 1) ; CI-MESA: [[GEP8:%[0-9]+]]:_(p0) = G_GEP [[GEP7]], [[C]](s64) ; CI-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p0) :: (load 1) @@ -9686,34 +10766,42 @@ body: | ; CI-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) ; CI-MESA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C9]] - ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY8]](s32) - ; CI-MESA: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; CI-MESA: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] + ; CI-MESA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY8]](s32) + ; CI-MESA: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) + ; CI-MESA: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] ; CI-MESA: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; CI-MESA: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] ; CI-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI-MESA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) ; CI-MESA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C9]] - ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY10]](s32) - ; CI-MESA: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) - ; CI-MESA: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] + ; CI-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY10]](s32) + ; CI-MESA: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) + ; CI-MESA: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] ; CI-MESA: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32) ; CI-MESA: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]] ; CI-MESA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI-MESA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) ; CI-MESA: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C9]] - ; CI-MESA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY12]](s32) - ; CI-MESA: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) - ; CI-MESA: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] + ; CI-MESA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY12]](s32) + ; CI-MESA: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32) + ; CI-MESA: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] ; CI-MESA: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; CI-MESA: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]] ; CI-MESA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI-MESA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32) ; CI-MESA: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C9]] - ; CI-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY14]](s32) - ; CI-MESA: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) - ; CI-MESA: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] - ; CI-MESA: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16), [[OR6]](s16), [[OR7]](s16) + ; CI-MESA: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY14]](s32) + ; CI-MESA: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) + ; CI-MESA: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] + ; CI-MESA: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; CI-MESA: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; CI-MESA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C10]](s32) + ; CI-MESA: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] + ; CI-MESA: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) + ; CI-MESA: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; CI-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C10]](s32) + ; CI-MESA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; CI-MESA: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[MV]](p1), [[MV1]](p1) ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x p1>) ; GFX9-MESA-LABEL: name: test_load_flat_v2p1_align1 @@ -9766,9 +10854,18 @@ body: | ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]] ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16) ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; GFX9-MESA: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) - ; GFX9-MESA: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GFX9-MESA: [[GEP7:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C9]](s64) + ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9-MESA: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) + ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; GFX9-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; GFX9-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) + ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; GFX9-MESA: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) + ; GFX9-MESA: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; GFX9-MESA: [[GEP7:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C10]](s64) ; GFX9-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p0) :: (load 1) ; GFX9-MESA: [[GEP8:%[0-9]+]]:_(p0) = G_GEP [[GEP7]], [[C]](s64) ; GFX9-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p0) :: (load 1) @@ -9788,27 +10885,35 @@ body: | ; GFX9-MESA: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] ; GFX9-MESA: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) ; GFX9-MESA: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]] - ; GFX9-MESA: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) - ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]] + ; GFX9-MESA: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) + ; GFX9-MESA: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]] ; GFX9-MESA: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; GFX9-MESA: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] ; GFX9-MESA: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) ; GFX9-MESA: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]] - ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) - ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] + ; GFX9-MESA: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) + ; GFX9-MESA: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]] ; GFX9-MESA: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32) ; GFX9-MESA: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]] ; GFX9-MESA: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32) ; GFX9-MESA: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]] - ; GFX9-MESA: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16) - ; GFX9-MESA: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL6]] + ; GFX9-MESA: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16) + ; GFX9-MESA: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]] ; GFX9-MESA: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; GFX9-MESA: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]] ; GFX9-MESA: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32) ; GFX9-MESA: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]] - ; GFX9-MESA: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16) - ; GFX9-MESA: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL7]] - ; GFX9-MESA: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16), [[OR6]](s16), [[OR7]](s16) + ; GFX9-MESA: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16) + ; GFX9-MESA: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]] + ; GFX9-MESA: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; GFX9-MESA: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; GFX9-MESA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32) + ; GFX9-MESA: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] + ; GFX9-MESA: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) + ; GFX9-MESA: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; GFX9-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32) + ; GFX9-MESA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; GFX9-MESA: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[MV]](p1), [[MV1]](p1) ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x p1>) %0:_(p0) = COPY $vgpr0_vgpr1 @@ -9914,9 +11019,14 @@ body: | ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI: [[MV:%[0-9]+]]:_(p3) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; CI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI: [[GEP3:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C6]](s64) + ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32) + ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; CI: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32) + ; CI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CI: [[GEP3:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C7]](s64) ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p0) :: (load 1) ; CI: [[GEP4:%[0-9]+]]:_(p0) = G_GEP [[GEP3]], [[C]](s64) ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p0) :: (load 1) @@ -9929,19 +11039,23 @@ body: | ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] - ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) - ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; CI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] + ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) + ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) + ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; CI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; CI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C5]] - ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY6]](s32) - ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI: [[MV1:%[0-9]+]]:_(p3) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) - ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[MV]](p3), [[MV1]](p3) + ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY6]](s32) + ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) + ; CI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] + ; CI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C6]](s32) + ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; CI: [[INTTOPTR1:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR5]](s32) + ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3) ; CI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>) ; VI-LABEL: name: test_load_flat_v2p3_align1 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 @@ -9969,9 +11083,14 @@ body: | ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; VI: [[MV:%[0-9]+]]:_(p3) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI: [[GEP3:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C5]](s64) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; VI: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32) + ; VI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; VI: [[GEP3:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C6]](s64) ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p0) :: (load 1) ; VI: [[GEP4:%[0-9]+]]:_(p0) = G_GEP [[GEP3]], [[C]](s64) ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p0) :: (load 1) @@ -9983,16 +11102,20 @@ body: | ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C3]] ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C3]] - ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) - ; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] + ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) + ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]] ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; VI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C3]] - ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) - ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; VI: [[MV1:%[0-9]+]]:_(p3) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[MV]](p3), [[MV1]](p3) + ; VI: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) + ; VI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL4]] + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C5]](s32) + ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; VI: [[INTTOPTR1:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR5]](s32) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3) ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>) ; GFX9-LABEL: name: test_load_flat_v2p3_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 @@ -10020,9 +11143,14 @@ body: | ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX9: [[MV:%[0-9]+]]:_(p3) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; GFX9: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9: [[GEP3:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C5]](s64) + ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; GFX9: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32) + ; GFX9: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; GFX9: [[GEP3:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C6]](s64) ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p0) :: (load 1) ; GFX9: [[GEP4:%[0-9]+]]:_(p0) = G_GEP [[GEP3]], [[C]](s64) ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p0) :: (load 1) @@ -10034,16 +11162,20 @@ body: | ; GFX9: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C3]] ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) ; GFX9: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C3]] - ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) - ; GFX9: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] + ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) + ; GFX9: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]] ; GFX9: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; GFX9: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; GFX9: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) ; GFX9: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C3]] - ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) - ; GFX9: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; GFX9: [[MV1:%[0-9]+]]:_(p3) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[MV]](p3), [[MV1]](p3) + ; GFX9: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) + ; GFX9: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL4]] + ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C5]](s32) + ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; GFX9: [[INTTOPTR1:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR5]](s32) + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3) ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>) ; CI-MESA-LABEL: name: test_load_flat_v2p3_align1 ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 @@ -10075,9 +11207,14 @@ body: | ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CI-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI-MESA: [[MV:%[0-9]+]]:_(p3) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; CI-MESA: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-MESA: [[GEP3:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C6]](s64) + ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32) + ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; CI-MESA: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32) + ; CI-MESA: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CI-MESA: [[GEP3:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C7]](s64) ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p0) :: (load 1) ; CI-MESA: [[GEP4:%[0-9]+]]:_(p0) = G_GEP [[GEP3]], [[C]](s64) ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p0) :: (load 1) @@ -10090,19 +11227,23 @@ body: | ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] - ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) - ; CI-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; CI-MESA: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] + ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) + ; CI-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) + ; CI-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; CI-MESA: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; CI-MESA: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C5]] - ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY6]](s32) - ; CI-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; CI-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI-MESA: [[MV1:%[0-9]+]]:_(p3) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) - ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[MV]](p3), [[MV1]](p3) + ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY6]](s32) + ; CI-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) + ; CI-MESA: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] + ; CI-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C6]](s32) + ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; CI-MESA: [[INTTOPTR1:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR5]](s32) + ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3) ; CI-MESA: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>) ; GFX9-MESA-LABEL: name: test_load_flat_v2p3_align1 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 @@ -10130,9 +11271,14 @@ body: | ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX9-MESA: [[MV:%[0-9]+]]:_(p3) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; GFX9-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-MESA: [[GEP3:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C5]](s64) + ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; GFX9-MESA: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32) + ; GFX9-MESA: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; GFX9-MESA: [[GEP3:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C6]](s64) ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p0) :: (load 1) ; GFX9-MESA: [[GEP4:%[0-9]+]]:_(p0) = G_GEP [[GEP3]], [[C]](s64) ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p0) :: (load 1) @@ -10144,16 +11290,20 @@ body: | ; GFX9-MESA: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C3]] ; GFX9-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) ; GFX9-MESA: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C3]] - ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) - ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] + ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) + ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]] ; GFX9-MESA: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; GFX9-MESA: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; GFX9-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C3]] - ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) - ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; GFX9-MESA: [[MV1:%[0-9]+]]:_(p3) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) - ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[MV]](p3), [[MV1]](p3) + ; GFX9-MESA: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) + ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL4]] + ; GFX9-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; GFX9-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C5]](s32) + ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; GFX9-MESA: [[INTTOPTR1:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR5]](s32) + ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3) ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<2 x p3>) = G_LOAD %0 :: (load 8, align 1, addrspace 0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir index e35914caa725e..9e7d3338d7867 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir @@ -446,13 +446,18 @@ body: | ; SI-LABEL: name: test_load_global_s32_align2 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[GEP:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C]](s64) ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p1) :: (load 2, addrspace 1) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; SI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; SI: $vgpr0 = COPY [[MV]](s32) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; SI: $vgpr0 = COPY [[OR]](s32) ; CI-HSA-LABEL: name: test_load_global_s32_align2 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, align 2, addrspace 1) @@ -460,23 +465,33 @@ body: | ; CI-MESA-LABEL: name: test_load_global_s32_align2 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI-MESA: [[GEP:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C]](s64) ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p1) :: (load 2, addrspace 1) - ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; CI-MESA: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; CI-MESA: $vgpr0 = COPY [[MV]](s32) + ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI-MESA: $vgpr0 = COPY [[OR]](s32) ; VI-LABEL: name: test_load_global_s32_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[GEP:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C]](s64) ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p1) :: (load 2, addrspace 1) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; VI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; VI: $vgpr0 = COPY [[MV]](s32) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; VI: $vgpr0 = COPY [[OR]](s32) ; GFX9-HSA-LABEL: name: test_load_global_s32_align2 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, align 2, addrspace 1) @@ -484,13 +499,18 @@ body: | ; GFX9-MESA-LABEL: name: test_load_global_s32_align2 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-MESA: [[GEP:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C]](s64) ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p1) :: (load 2, addrspace 1) - ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9-MESA: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-MESA: $vgpr0 = COPY [[MV]](s32) + ; GFX9-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; GFX9-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; GFX9-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX9-MESA: $vgpr0 = COPY [[OR]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s32) = G_LOAD %0 :: (load 4, align 2, addrspace 1) $vgpr0 = COPY %1 @@ -532,8 +552,12 @@ body: | ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; SI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; SI: $vgpr0 = COPY [[MV]](s32) + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32) + ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; SI: $vgpr0 = COPY [[OR2]](s32) ; CI-HSA-LABEL: name: test_load_global_s32_align1 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, align 1, addrspace 1) @@ -568,8 +592,12 @@ body: | ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CI-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI-MESA: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; CI-MESA: $vgpr0 = COPY [[MV]](s32) + ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32) + ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; CI-MESA: $vgpr0 = COPY [[OR2]](s32) ; VI-LABEL: name: test_load_global_s32_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) @@ -596,8 +624,12 @@ body: | ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; VI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; VI: $vgpr0 = COPY [[MV]](s32) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; VI: $vgpr0 = COPY [[OR2]](s32) ; GFX9-HSA-LABEL: name: test_load_global_s32_align1 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, align 1, addrspace 1) @@ -628,8 +660,12 @@ body: | ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX9-MESA: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; GFX9-MESA: $vgpr0 = COPY [[MV]](s32) + ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; GFX9-MESA: $vgpr0 = COPY [[OR2]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s32) = G_LOAD %0 :: (load 4, align 1, addrspace 1) $vgpr0 = COPY %1 @@ -775,20 +811,30 @@ body: | ; SI-LABEL: name: test_load_global_s64_align2 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[GEP:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C]](s64) ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p1) :: (load 2, addrspace 1) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI: [[GEP1:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C1]](s64) ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p1) :: (load 2, addrspace 1) - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; SI: [[GEP2:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C2]](s64) ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p1) :: (load 2, addrspace 1) - ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) + ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; SI: $vgpr0_vgpr1 = COPY [[MV]](s64) ; CI-HSA-LABEL: name: test_load_global_s64_align2 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -797,38 +843,58 @@ body: | ; CI-MESA-LABEL: name: test_load_global_s64_align2 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI-MESA: [[GEP:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C]](s64) ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p1) :: (load 2, addrspace 1) - ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI-MESA: [[GEP1:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C1]](s64) ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p1) :: (load 2, addrspace 1) - ; CI-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; CI-MESA: [[GEP2:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C2]](s64) ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p1) :: (load 2, addrspace 1) - ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; CI-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) + ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; CI-MESA: $vgpr0_vgpr1 = COPY [[MV]](s64) ; VI-LABEL: name: test_load_global_s64_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[GEP:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C]](s64) ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p1) :: (load 2, addrspace 1) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[GEP1:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C1]](s64) ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p1) :: (load 2, addrspace 1) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; VI: [[GEP2:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C2]](s64) ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p1) :: (load 2, addrspace 1) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) + ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; VI: $vgpr0_vgpr1 = COPY [[MV]](s64) ; GFX9-HSA-LABEL: name: test_load_global_s64_align2 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -837,20 +903,30 @@ body: | ; GFX9-MESA-LABEL: name: test_load_global_s64_align2 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-MESA: [[GEP:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C]](s64) ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p1) :: (load 2, addrspace 1) - ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9-MESA: [[GEP1:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C1]](s64) ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p1) :: (load 2, addrspace 1) - ; GFX9-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; GFX9-MESA: [[GEP2:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C2]](s64) ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p1) :: (load 2, addrspace 1) - ; GFX9-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX9-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) + ; GFX9-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; GFX9-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; GFX9-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; GFX9-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[MV]](s64) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s64) = G_LOAD %0 :: (load 8, align 2, addrspace 1) @@ -921,7 +997,16 @@ body: | ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32) ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; SI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32) + ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32) + ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; SI: $vgpr0_vgpr1 = COPY [[MV]](s64) ; CI-HSA-LABEL: name: test_load_global_s64_align1 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -985,7 +1070,16 @@ body: | ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32) ; CI-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; CI-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) + ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI-MESA: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32) + ; CI-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; CI-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CI-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32) + ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; CI-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; CI-MESA: $vgpr0_vgpr1 = COPY [[MV]](s64) ; VI-LABEL: name: test_load_global_s64_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -1037,7 +1131,16 @@ body: | ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]] ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16) ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) + ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) + ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; VI: $vgpr0_vgpr1 = COPY [[MV]](s64) ; GFX9-HSA-LABEL: name: test_load_global_s64_align1 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -1093,7 +1196,16 @@ body: | ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]] ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16) ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; GFX9-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) + ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9-MESA: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) + ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; GFX9-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; GFX9-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) + ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; GFX9-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[MV]](s64) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s64) = G_LOAD %0 :: (load 8, align 1, addrspace 1) @@ -1214,28 +1326,42 @@ body: | ; SI-LABEL: name: test_load_global_s96_align2 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[GEP:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C]](s64) ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p1) :: (load 2, addrspace 1) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI: [[GEP1:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C1]](s64) ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p1) :: (load 2, addrspace 1) - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; SI: [[GEP2:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C2]](s64) ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p1) :: (load 2, addrspace 1) - ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI: [[GEP3:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C3]](s64) ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p1) :: (load 2, addrspace 1) - ; SI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10 ; SI: [[GEP4:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C4]](s64) ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p1) :: (load 2, addrspace 1) - ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) - ; SI: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16), [[TRUNC4]](s16), [[TRUNC5]](s16) + ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C5]] + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]] + ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C5]] + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C5]] + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] + ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]] + ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; SI: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32), [[OR2]](s32) ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) ; CI-HSA-LABEL: name: test_load_global_s96_align2 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -1244,54 +1370,82 @@ body: | ; CI-MESA-LABEL: name: test_load_global_s96_align2 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI-MESA: [[GEP:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C]](s64) ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p1) :: (load 2, addrspace 1) - ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI-MESA: [[GEP1:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C1]](s64) ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p1) :: (load 2, addrspace 1) - ; CI-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; CI-MESA: [[GEP2:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C2]](s64) ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p1) :: (load 2, addrspace 1) - ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) ; CI-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CI-MESA: [[GEP3:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C3]](s64) ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p1) :: (load 2, addrspace 1) - ; CI-MESA: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; CI-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10 ; CI-MESA: [[GEP4:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C4]](s64) ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p1) :: (load 2, addrspace 1) - ; CI-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) - ; CI-MESA: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16), [[TRUNC4]](s16), [[TRUNC5]](s16) + ; CI-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C5]] + ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]] + ; CI-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C5]] + ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C5]] + ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] + ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]] + ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CI-MESA: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32), [[OR2]](s32) ; CI-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) ; VI-LABEL: name: test_load_global_s96_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[GEP:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C]](s64) ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p1) :: (load 2, addrspace 1) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[GEP1:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C1]](s64) ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p1) :: (load 2, addrspace 1) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; VI: [[GEP2:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C2]](s64) ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p1) :: (load 2, addrspace 1) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI: [[GEP3:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C3]](s64) ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p1) :: (load 2, addrspace 1) - ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10 ; VI: [[GEP4:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C4]](s64) ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p1) :: (load 2, addrspace 1) - ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) - ; VI: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16), [[TRUNC4]](s16), [[TRUNC5]](s16) + ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C5]] + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]] + ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C5]] + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C5]] + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] + ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]] + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; VI: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32), [[OR2]](s32) ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) ; GFX9-HSA-LABEL: name: test_load_global_s96_align2 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -1300,28 +1454,42 @@ body: | ; GFX9-MESA-LABEL: name: test_load_global_s96_align2 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-MESA: [[GEP:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C]](s64) ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p1) :: (load 2, addrspace 1) - ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9-MESA: [[GEP1:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C1]](s64) ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p1) :: (load 2, addrspace 1) - ; GFX9-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; GFX9-MESA: [[GEP2:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C2]](s64) ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p1) :: (load 2, addrspace 1) - ; GFX9-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) ; GFX9-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; GFX9-MESA: [[GEP3:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C3]](s64) ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p1) :: (load 2, addrspace 1) - ; GFX9-MESA: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; GFX9-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10 ; GFX9-MESA: [[GEP4:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C4]](s64) ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p1) :: (load 2, addrspace 1) - ; GFX9-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) - ; GFX9-MESA: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16), [[TRUNC4]](s16), [[TRUNC5]](s16) + ; GFX9-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C5]] + ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]] + ; GFX9-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; GFX9-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C5]] + ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C5]] + ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; GFX9-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; GFX9-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] + ; GFX9-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; GFX9-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]] + ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; GFX9-MESA: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32), [[OR2]](s32) ; GFX9-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s96) = G_LOAD %0 :: (load 12, align 2, addrspace 1) @@ -1420,7 +1588,20 @@ body: | ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C12]](s32) ; SI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) ; SI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] - ; SI: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16), [[OR4]](s16), [[OR5]](s16) + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; SI: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C14]](s32) + ; SI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL6]] + ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C14]](s32) + ; SI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL7]] + ; SI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; SI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) + ; SI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C14]](s32) + ; SI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] + ; SI: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR6]](s32), [[OR7]](s32), [[OR8]](s32) ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) ; CI-HSA-LABEL: name: test_load_global_s96_align1 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -1512,7 +1693,20 @@ body: | ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C12]](s32) ; CI-MESA: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) ; CI-MESA: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] - ; CI-MESA: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16), [[OR4]](s16), [[OR5]](s16) + ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI-MESA: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C14]](s32) + ; CI-MESA: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL6]] + ; CI-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CI-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C14]](s32) + ; CI-MESA: [[OR7:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL7]] + ; CI-MESA: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; CI-MESA: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) + ; CI-MESA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C14]](s32) + ; CI-MESA: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] + ; CI-MESA: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR6]](s32), [[OR7]](s32), [[OR8]](s32) ; CI-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) ; VI-LABEL: name: test_load_global_s96_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -1588,7 +1782,20 @@ body: | ; VI: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C11]] ; VI: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C12]](s16) ; VI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] - ; VI: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16), [[OR4]](s16), [[OR5]](s16) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C13]](s32) + ; VI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL6]] + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; VI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C13]](s32) + ; VI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL7]] + ; VI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; VI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) + ; VI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C13]](s32) + ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] + ; VI: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR6]](s32), [[OR7]](s32), [[OR8]](s32) ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) ; GFX9-HSA-LABEL: name: test_load_global_s96_align1 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -1668,7 +1875,20 @@ body: | ; GFX9-MESA: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C11]] ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C12]](s16) ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] - ; GFX9-MESA: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16), [[OR4]](s16), [[OR5]](s16) + ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9-MESA: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C13]](s32) + ; GFX9-MESA: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL6]] + ; GFX9-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; GFX9-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; GFX9-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C13]](s32) + ; GFX9-MESA: [[OR7:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL7]] + ; GFX9-MESA: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; GFX9-MESA: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) + ; GFX9-MESA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C13]](s32) + ; GFX9-MESA: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] + ; GFX9-MESA: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR6]](s32), [[OR7]](s32), [[OR8]](s32) ; GFX9-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s96) = G_LOAD %0 :: (load 12, align 1, addrspace 1) @@ -1971,7 +2191,24 @@ body: | ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C16]](s32) ; SI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) ; SI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] - ; SI: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16), [[OR4]](s16), [[OR5]](s16), [[OR6]](s16), [[OR7]](s16) + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; SI: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C18]](s32) + ; SI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL8]] + ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; SI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C18]](s32) + ; SI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL9]] + ; SI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; SI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) + ; SI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C18]](s32) + ; SI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] + ; SI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; SI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; SI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C18]](s32) + ; SI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; SI: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR8]](s32), [[OR9]](s32), [[OR10]](s32), [[OR11]](s32) ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) ; CI-HSA-LABEL: name: test_load_global_s128_align1 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -2091,7 +2328,24 @@ body: | ; CI-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C16]](s32) ; CI-MESA: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) ; CI-MESA: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] - ; CI-MESA: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16), [[OR4]](s16), [[OR5]](s16), [[OR6]](s16), [[OR7]](s16) + ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI-MESA: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C18]](s32) + ; CI-MESA: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL8]] + ; CI-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CI-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI-MESA: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C18]](s32) + ; CI-MESA: [[OR9:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL9]] + ; CI-MESA: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; CI-MESA: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) + ; CI-MESA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C18]](s32) + ; CI-MESA: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] + ; CI-MESA: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; CI-MESA: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; CI-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C18]](s32) + ; CI-MESA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; CI-MESA: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR8]](s32), [[OR9]](s32), [[OR10]](s32), [[OR11]](s32) ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) ; VI-LABEL: name: test_load_global_s128_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -2191,7 +2445,24 @@ body: | ; VI: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C15]] ; VI: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C16]](s16) ; VI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL7]] - ; VI: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16), [[OR4]](s16), [[OR5]](s16), [[OR6]](s16), [[OR7]](s16) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C17]](s32) + ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL8]] + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; VI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C17]](s32) + ; VI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL9]] + ; VI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; VI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) + ; VI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C17]](s32) + ; VI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] + ; VI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; VI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C17]](s32) + ; VI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; VI: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR8]](s32), [[OR9]](s32), [[OR10]](s32), [[OR11]](s32) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) ; GFX9-HSA-LABEL: name: test_load_global_s128_align1 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -2295,7 +2566,24 @@ body: | ; GFX9-MESA: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C15]] ; GFX9-MESA: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C16]](s16) ; GFX9-MESA: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL7]] - ; GFX9-MESA: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16), [[OR4]](s16), [[OR5]](s16), [[OR6]](s16), [[OR7]](s16) + ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9-MESA: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C17]](s32) + ; GFX9-MESA: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL8]] + ; GFX9-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; GFX9-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; GFX9-MESA: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C17]](s32) + ; GFX9-MESA: [[OR9:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL9]] + ; GFX9-MESA: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; GFX9-MESA: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) + ; GFX9-MESA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C17]](s32) + ; GFX9-MESA: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] + ; GFX9-MESA: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; GFX9-MESA: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; GFX9-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C17]](s32) + ; GFX9-MESA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; GFX9-MESA: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR8]](s32), [[OR9]](s32), [[OR10]](s32), [[OR11]](s32) ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s128) = G_LOAD %0 :: (load 16, align 1, addrspace 1) @@ -2471,7 +2759,16 @@ body: | ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32) ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; SI: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; SI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32) + ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32) + ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; SI: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; SI: $vgpr0_vgpr1 = COPY [[MV]](p1) ; CI-HSA-LABEL: name: test_load_global_p1_align1 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -2535,7 +2832,16 @@ body: | ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32) ; CI-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; CI-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI-MESA: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) + ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI-MESA: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32) + ; CI-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; CI-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CI-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32) + ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; CI-MESA: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; CI-MESA: $vgpr0_vgpr1 = COPY [[MV]](p1) ; VI-LABEL: name: test_load_global_p1_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -2587,7 +2893,16 @@ body: | ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]] ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16) ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; VI: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) + ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) + ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; VI: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; VI: $vgpr0_vgpr1 = COPY [[MV]](p1) ; GFX9-HSA-LABEL: name: test_load_global_p1_align1 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -2643,7 +2958,16 @@ body: | ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]] ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16) ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; GFX9-MESA: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) + ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9-MESA: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) + ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; GFX9-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; GFX9-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) + ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; GFX9-MESA: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[MV]](p1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(p1) = G_LOAD %0 :: (load 8, align 1, addrspace 1) @@ -2768,20 +3092,30 @@ body: | ; SI-LABEL: name: test_load_global_p4_align2 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[GEP:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C]](s64) ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p1) :: (load 2, addrspace 1) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI: [[GEP1:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C1]](s64) ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p1) :: (load 2, addrspace 1) - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; SI: [[GEP2:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C2]](s64) ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p1) :: (load 2, addrspace 1) - ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; SI: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) + ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; SI: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; SI: $vgpr0_vgpr1 = COPY [[MV]](p4) ; CI-HSA-LABEL: name: test_load_global_p4_align2 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -2790,38 +3124,58 @@ body: | ; CI-MESA-LABEL: name: test_load_global_p4_align2 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI-MESA: [[GEP:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C]](s64) ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p1) :: (load 2, addrspace 1) - ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI-MESA: [[GEP1:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C1]](s64) ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p1) :: (load 2, addrspace 1) - ; CI-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; CI-MESA: [[GEP2:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C2]](s64) ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p1) :: (load 2, addrspace 1) - ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; CI-MESA: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) + ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI-MESA: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; CI-MESA: $vgpr0_vgpr1 = COPY [[MV]](p4) ; VI-LABEL: name: test_load_global_p4_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[GEP:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C]](s64) ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p1) :: (load 2, addrspace 1) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[GEP1:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C1]](s64) ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p1) :: (load 2, addrspace 1) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; VI: [[GEP2:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C2]](s64) ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p1) :: (load 2, addrspace 1) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; VI: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) + ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; VI: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; VI: $vgpr0_vgpr1 = COPY [[MV]](p4) ; GFX9-HSA-LABEL: name: test_load_global_p4_align2 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -2830,20 +3184,30 @@ body: | ; GFX9-MESA-LABEL: name: test_load_global_p4_align2 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-MESA: [[GEP:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C]](s64) ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p1) :: (load 2, addrspace 1) - ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9-MESA: [[GEP1:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C1]](s64) ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p1) :: (load 2, addrspace 1) - ; GFX9-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; GFX9-MESA: [[GEP2:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C2]](s64) ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p1) :: (load 2, addrspace 1) - ; GFX9-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX9-MESA: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) + ; GFX9-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; GFX9-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; GFX9-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; GFX9-MESA: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[MV]](p4) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(p4) = G_LOAD %0 :: (load 8, align 2, addrspace 1) @@ -2914,7 +3278,16 @@ body: | ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32) ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; SI: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; SI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32) + ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32) + ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; SI: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; SI: $vgpr0_vgpr1 = COPY [[MV]](p4) ; CI-HSA-LABEL: name: test_load_global_p4_align1 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -2978,7 +3351,16 @@ body: | ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32) ; CI-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; CI-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI-MESA: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) + ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI-MESA: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32) + ; CI-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; CI-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CI-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32) + ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; CI-MESA: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; CI-MESA: $vgpr0_vgpr1 = COPY [[MV]](p4) ; VI-LABEL: name: test_load_global_p4_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -3030,7 +3412,16 @@ body: | ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]] ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16) ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; VI: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) + ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) + ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; VI: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; VI: $vgpr0_vgpr1 = COPY [[MV]](p4) ; GFX9-HSA-LABEL: name: test_load_global_p4_align1 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -3086,7 +3477,16 @@ body: | ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]] ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16) ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; GFX9-MESA: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) + ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9-MESA: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) + ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; GFX9-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; GFX9-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) + ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; GFX9-MESA: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[MV]](p4) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(p4) = G_LOAD %0 :: (load 8, align 1, addrspace 1) @@ -3137,13 +3537,19 @@ body: | ; SI-LABEL: name: test_load_global_p5_align2 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[GEP:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C]](s64) ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p1) :: (load 2, addrspace 1) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; SI: [[MV:%[0-9]+]]:_(p5) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; SI: $vgpr0 = COPY [[MV]](p5) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; SI: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) + ; SI: $vgpr0 = COPY [[INTTOPTR]](p5) ; CI-HSA-LABEL: name: test_load_global_p5_align2 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p1) :: (load 4, align 2, addrspace 1) @@ -3151,23 +3557,35 @@ body: | ; CI-MESA-LABEL: name: test_load_global_p5_align2 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI-MESA: [[GEP:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C]](s64) ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p1) :: (load 2, addrspace 1) - ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; CI-MESA: [[MV:%[0-9]+]]:_(p5) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; CI-MESA: $vgpr0 = COPY [[MV]](p5) + ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI-MESA: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) + ; CI-MESA: $vgpr0 = COPY [[INTTOPTR]](p5) ; VI-LABEL: name: test_load_global_p5_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[GEP:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C]](s64) ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p1) :: (load 2, addrspace 1) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; VI: [[MV:%[0-9]+]]:_(p5) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; VI: $vgpr0 = COPY [[MV]](p5) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; VI: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) + ; VI: $vgpr0 = COPY [[INTTOPTR]](p5) ; GFX9-HSA-LABEL: name: test_load_global_p5_align2 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p1) :: (load 4, align 2, addrspace 1) @@ -3175,13 +3593,19 @@ body: | ; GFX9-MESA-LABEL: name: test_load_global_p5_align2 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-MESA: [[GEP:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C]](s64) ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p1) :: (load 2, addrspace 1) - ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9-MESA: [[MV:%[0-9]+]]:_(p5) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-MESA: $vgpr0 = COPY [[MV]](p5) + ; GFX9-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; GFX9-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; GFX9-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX9-MESA: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) + ; GFX9-MESA: $vgpr0 = COPY [[INTTOPTR]](p5) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(p5) = G_LOAD %0 :: (load 4, align 2, addrspace 1) $vgpr0 = COPY %1 @@ -3223,8 +3647,13 @@ body: | ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; SI: [[MV:%[0-9]+]]:_(p5) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; SI: $vgpr0 = COPY [[MV]](p5) + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32) + ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; SI: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) + ; SI: $vgpr0 = COPY [[INTTOPTR]](p5) ; CI-HSA-LABEL: name: test_load_global_p5_align1 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p1) :: (load 4, align 1, addrspace 1) @@ -3259,8 +3688,13 @@ body: | ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CI-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI-MESA: [[MV:%[0-9]+]]:_(p5) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; CI-MESA: $vgpr0 = COPY [[MV]](p5) + ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32) + ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; CI-MESA: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) + ; CI-MESA: $vgpr0 = COPY [[INTTOPTR]](p5) ; VI-LABEL: name: test_load_global_p5_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) @@ -3287,8 +3721,13 @@ body: | ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; VI: [[MV:%[0-9]+]]:_(p5) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; VI: $vgpr0 = COPY [[MV]](p5) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; VI: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) + ; VI: $vgpr0 = COPY [[INTTOPTR]](p5) ; GFX9-HSA-LABEL: name: test_load_global_p5_align1 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p1) :: (load 4, align 1, addrspace 1) @@ -3319,8 +3758,13 @@ body: | ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX9-MESA: [[MV:%[0-9]+]]:_(p5) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; GFX9-MESA: $vgpr0 = COPY [[MV]](p5) + ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; GFX9-MESA: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) + ; GFX9-MESA: $vgpr0 = COPY [[INTTOPTR]](p5) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(p5) = G_LOAD %0 :: (load 4, align 1, addrspace 1) $vgpr0 = COPY %1 @@ -6434,34 +6878,52 @@ body: | ; SI-LABEL: name: test_load_global_v2s64_align2 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[GEP:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C]](s64) ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p1) :: (load 2, addrspace 1) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI: [[GEP1:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C1]](s64) ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p1) :: (load 2, addrspace 1) - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; SI: [[GEP2:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C2]](s64) ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p1) :: (load 2, addrspace 1) - ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) - ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; SI: [[GEP3:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C3]](s64) + ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) + ; SI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; SI: [[GEP3:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C5]](s64) ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p1) :: (load 2, addrspace 1) - ; SI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; SI: [[GEP4:%[0-9]+]]:_(p1) = G_GEP [[GEP3]], [[C]](s64) ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p1) :: (load 2, addrspace 1) - ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) ; SI: [[GEP5:%[0-9]+]]:_(p1) = G_GEP [[GEP3]], [[C1]](s64) ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[GEP5]](p1) :: (load 2, addrspace 1) - ; SI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; SI: [[GEP6:%[0-9]+]]:_(p1) = G_GEP [[GEP3]], [[C2]](s64) ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[GEP6]](p1) :: (load 2, addrspace 1) - ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; SI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[TRUNC4]](s16), [[TRUNC5]](s16), [[TRUNC6]](s16), [[TRUNC7]](s16) + ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] + ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] + ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) + ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] + ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) + ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]] + ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C4]](s32) + ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; SI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR3]](s32) ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; CI-HSA-LABEL: name: test_load_global_v2s64_align2 @@ -6471,67 +6933,103 @@ body: | ; CI-MESA-LABEL: name: test_load_global_v2s64_align2 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI-MESA: [[GEP:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C]](s64) ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p1) :: (load 2, addrspace 1) - ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI-MESA: [[GEP1:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C1]](s64) ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p1) :: (load 2, addrspace 1) - ; CI-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; CI-MESA: [[GEP2:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C2]](s64) ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p1) :: (load 2, addrspace 1) - ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; CI-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) - ; CI-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-MESA: [[GEP3:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C3]](s64) + ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) + ; CI-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; CI-MESA: [[GEP3:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C5]](s64) ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p1) :: (load 2, addrspace 1) - ; CI-MESA: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; CI-MESA: [[GEP4:%[0-9]+]]:_(p1) = G_GEP [[GEP3]], [[C]](s64) ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p1) :: (load 2, addrspace 1) - ; CI-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) ; CI-MESA: [[GEP5:%[0-9]+]]:_(p1) = G_GEP [[GEP3]], [[C1]](s64) ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[GEP5]](p1) :: (load 2, addrspace 1) - ; CI-MESA: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; CI-MESA: [[GEP6:%[0-9]+]]:_(p1) = G_GEP [[GEP3]], [[C2]](s64) ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[GEP6]](p1) :: (load 2, addrspace 1) - ; CI-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; CI-MESA: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[TRUNC4]](s16), [[TRUNC5]](s16), [[TRUNC6]](s16), [[TRUNC7]](s16) + ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] + ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] + ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) + ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CI-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; CI-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] + ; CI-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) + ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]] + ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C4]](s32) + ; CI-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; CI-MESA: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR3]](s32) ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; VI-LABEL: name: test_load_global_v2s64_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[GEP:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C]](s64) ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p1) :: (load 2, addrspace 1) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[GEP1:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C1]](s64) ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p1) :: (load 2, addrspace 1) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; VI: [[GEP2:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C2]](s64) ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p1) :: (load 2, addrspace 1) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) - ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI: [[GEP3:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C3]](s64) + ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) + ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; VI: [[GEP3:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C5]](s64) ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p1) :: (load 2, addrspace 1) - ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; VI: [[GEP4:%[0-9]+]]:_(p1) = G_GEP [[GEP3]], [[C]](s64) ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p1) :: (load 2, addrspace 1) - ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) ; VI: [[GEP5:%[0-9]+]]:_(p1) = G_GEP [[GEP3]], [[C1]](s64) ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[GEP5]](p1) :: (load 2, addrspace 1) - ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; VI: [[GEP6:%[0-9]+]]:_(p1) = G_GEP [[GEP3]], [[C2]](s64) ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[GEP6]](p1) :: (load 2, addrspace 1) - ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; VI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[TRUNC4]](s16), [[TRUNC5]](s16), [[TRUNC6]](s16), [[TRUNC7]](s16) + ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] + ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] + ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) + ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]] + ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C4]](s32) + ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; VI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR3]](s32) ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; GFX9-HSA-LABEL: name: test_load_global_v2s64_align2 @@ -6541,34 +7039,52 @@ body: | ; GFX9-MESA-LABEL: name: test_load_global_v2s64_align2 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-MESA: [[GEP:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C]](s64) ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p1) :: (load 2, addrspace 1) - ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9-MESA: [[GEP1:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C1]](s64) ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p1) :: (load 2, addrspace 1) - ; GFX9-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; GFX9-MESA: [[GEP2:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C2]](s64) ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p1) :: (load 2, addrspace 1) - ; GFX9-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX9-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GFX9-MESA: [[GEP3:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C3]](s64) + ; GFX9-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; GFX9-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; GFX9-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; GFX9-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) + ; GFX9-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; GFX9-MESA: [[GEP3:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C5]](s64) ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p1) :: (load 2, addrspace 1) - ; GFX9-MESA: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; GFX9-MESA: [[GEP4:%[0-9]+]]:_(p1) = G_GEP [[GEP3]], [[C]](s64) ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p1) :: (load 2, addrspace 1) - ; GFX9-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) ; GFX9-MESA: [[GEP5:%[0-9]+]]:_(p1) = G_GEP [[GEP3]], [[C1]](s64) ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[GEP5]](p1) :: (load 2, addrspace 1) - ; GFX9-MESA: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; GFX9-MESA: [[GEP6:%[0-9]+]]:_(p1) = G_GEP [[GEP3]], [[C2]](s64) ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[GEP6]](p1) :: (load 2, addrspace 1) - ; GFX9-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; GFX9-MESA: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[TRUNC4]](s16), [[TRUNC5]](s16), [[TRUNC6]](s16), [[TRUNC7]](s16) + ; GFX9-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; GFX9-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] + ; GFX9-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; GFX9-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] + ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) + ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; GFX9-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; GFX9-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] + ; GFX9-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) + ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]] + ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C4]](s32) + ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; GFX9-MESA: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR3]](s32) ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) %0:_(p1) = COPY $vgpr0_vgpr1 @@ -6640,9 +7156,18 @@ body: | ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32) ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) - ; SI: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; SI: [[GEP7:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C10]](s64) + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; SI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32) + ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32) + ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) + ; SI: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; SI: [[GEP7:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C11]](s64) ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p1) :: (load 1, addrspace 1) ; SI: [[GEP8:%[0-9]+]]:_(p1) = G_GEP [[GEP7]], [[C]](s64) ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p1) :: (load 1, addrspace 1) @@ -6663,34 +7188,42 @@ body: | ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C9]] - ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY8]](s32) - ; SI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; SI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] + ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY8]](s32) + ; SI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) + ; SI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] ; SI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; SI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C9]] - ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY10]](s32) - ; SI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) - ; SI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] + ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY10]](s32) + ; SI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) + ; SI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] ; SI: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32) ; SI: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]] ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) ; SI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C9]] - ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY12]](s32) - ; SI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) - ; SI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] + ; SI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY12]](s32) + ; SI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32) + ; SI: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] ; SI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; SI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]] ; SI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; SI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32) ; SI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C9]] - ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY14]](s32) - ; SI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) - ; SI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] - ; SI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16), [[OR6]](s16), [[OR7]](s16) + ; SI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY14]](s32) + ; SI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) + ; SI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] + ; SI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; SI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; SI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C10]](s32) + ; SI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] + ; SI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) + ; SI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; SI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C10]](s32) + ; SI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; SI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; CI-HSA-LABEL: name: test_load_global_v2s64_align1 @@ -6755,9 +7288,18 @@ body: | ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32) ; CI-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; CI-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) - ; CI-MESA: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-MESA: [[GEP7:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C10]](s64) + ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI-MESA: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32) + ; CI-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; CI-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CI-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32) + ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; CI-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) + ; CI-MESA: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; CI-MESA: [[GEP7:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C11]](s64) ; CI-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p1) :: (load 1, addrspace 1) ; CI-MESA: [[GEP8:%[0-9]+]]:_(p1) = G_GEP [[GEP7]], [[C]](s64) ; CI-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p1) :: (load 1, addrspace 1) @@ -6778,34 +7320,42 @@ body: | ; CI-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) ; CI-MESA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C9]] - ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY8]](s32) - ; CI-MESA: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; CI-MESA: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] + ; CI-MESA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY8]](s32) + ; CI-MESA: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) + ; CI-MESA: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] ; CI-MESA: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; CI-MESA: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] ; CI-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI-MESA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) ; CI-MESA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C9]] - ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY10]](s32) - ; CI-MESA: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) - ; CI-MESA: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] + ; CI-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY10]](s32) + ; CI-MESA: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) + ; CI-MESA: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] ; CI-MESA: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32) ; CI-MESA: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]] ; CI-MESA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI-MESA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) ; CI-MESA: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C9]] - ; CI-MESA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY12]](s32) - ; CI-MESA: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) - ; CI-MESA: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] + ; CI-MESA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY12]](s32) + ; CI-MESA: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32) + ; CI-MESA: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] ; CI-MESA: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; CI-MESA: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]] ; CI-MESA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI-MESA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32) ; CI-MESA: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C9]] - ; CI-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY14]](s32) - ; CI-MESA: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) - ; CI-MESA: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] - ; CI-MESA: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16), [[OR6]](s16), [[OR7]](s16) + ; CI-MESA: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY14]](s32) + ; CI-MESA: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) + ; CI-MESA: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] + ; CI-MESA: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; CI-MESA: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; CI-MESA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C10]](s32) + ; CI-MESA: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] + ; CI-MESA: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) + ; CI-MESA: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; CI-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C10]](s32) + ; CI-MESA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; CI-MESA: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; VI-LABEL: name: test_load_global_v2s64_align1 @@ -6858,9 +7408,18 @@ body: | ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]] ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16) ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) - ; VI: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI: [[GEP7:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C9]](s64) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) + ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) + ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) + ; VI: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; VI: [[GEP7:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C10]](s64) ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p1) :: (load 1, addrspace 1) ; VI: [[GEP8:%[0-9]+]]:_(p1) = G_GEP [[GEP7]], [[C]](s64) ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p1) :: (load 1, addrspace 1) @@ -6880,27 +7439,35 @@ body: | ; VI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] ; VI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) ; VI: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]] - ; VI: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) - ; VI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]] + ; VI: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) + ; VI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]] ; VI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; VI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] ; VI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) ; VI: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]] - ; VI: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) - ; VI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] + ; VI: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) + ; VI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]] ; VI: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32) ; VI: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]] ; VI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32) ; VI: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]] - ; VI: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16) - ; VI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL6]] + ; VI: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16) + ; VI: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]] ; VI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; VI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]] ; VI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32) ; VI: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]] - ; VI: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16) - ; VI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL7]] - ; VI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16), [[OR6]](s16), [[OR7]](s16) + ; VI: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16) + ; VI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]] + ; VI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; VI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; VI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32) + ; VI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] + ; VI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) + ; VI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32) + ; VI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; VI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; GFX9-HSA-LABEL: name: test_load_global_v2s64_align1 @@ -6957,9 +7524,18 @@ body: | ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]] ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16) ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; GFX9-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) - ; GFX9-MESA: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GFX9-MESA: [[GEP7:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C9]](s64) + ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9-MESA: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) + ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; GFX9-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; GFX9-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) + ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; GFX9-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) + ; GFX9-MESA: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; GFX9-MESA: [[GEP7:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C10]](s64) ; GFX9-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p1) :: (load 1, addrspace 1) ; GFX9-MESA: [[GEP8:%[0-9]+]]:_(p1) = G_GEP [[GEP7]], [[C]](s64) ; GFX9-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p1) :: (load 1, addrspace 1) @@ -6979,27 +7555,35 @@ body: | ; GFX9-MESA: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] ; GFX9-MESA: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) ; GFX9-MESA: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]] - ; GFX9-MESA: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) - ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]] + ; GFX9-MESA: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) + ; GFX9-MESA: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]] ; GFX9-MESA: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; GFX9-MESA: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] ; GFX9-MESA: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) ; GFX9-MESA: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]] - ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) - ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] + ; GFX9-MESA: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) + ; GFX9-MESA: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]] ; GFX9-MESA: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32) ; GFX9-MESA: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]] ; GFX9-MESA: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32) ; GFX9-MESA: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]] - ; GFX9-MESA: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16) - ; GFX9-MESA: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL6]] + ; GFX9-MESA: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16) + ; GFX9-MESA: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]] ; GFX9-MESA: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; GFX9-MESA: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]] ; GFX9-MESA: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32) ; GFX9-MESA: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]] - ; GFX9-MESA: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16) - ; GFX9-MESA: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL7]] - ; GFX9-MESA: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16), [[OR6]](s16), [[OR7]](s16) + ; GFX9-MESA: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16) + ; GFX9-MESA: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]] + ; GFX9-MESA: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; GFX9-MESA: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; GFX9-MESA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32) + ; GFX9-MESA: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] + ; GFX9-MESA: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) + ; GFX9-MESA: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; GFX9-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32) + ; GFX9-MESA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; GFX9-MESA: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) %0:_(p1) = COPY $vgpr0_vgpr1 @@ -7169,9 +7753,18 @@ body: | ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32) ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) - ; SI: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; SI: [[GEP7:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C10]](s64) + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; SI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32) + ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32) + ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) + ; SI: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; SI: [[GEP7:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C11]](s64) ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p1) :: (load 1, addrspace 1) ; SI: [[GEP8:%[0-9]+]]:_(p1) = G_GEP [[GEP7]], [[C]](s64) ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p1) :: (load 1, addrspace 1) @@ -7192,36 +7785,44 @@ body: | ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C9]] - ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY8]](s32) - ; SI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; SI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] + ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY8]](s32) + ; SI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) + ; SI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] ; SI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; SI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C9]] - ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY10]](s32) - ; SI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) - ; SI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] + ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY10]](s32) + ; SI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) + ; SI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] ; SI: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32) ; SI: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]] ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) ; SI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C9]] - ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY12]](s32) - ; SI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) - ; SI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] + ; SI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY12]](s32) + ; SI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32) + ; SI: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] ; SI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; SI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]] ; SI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; SI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32) ; SI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C9]] - ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY14]](s32) - ; SI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) - ; SI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] - ; SI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16), [[OR6]](s16), [[OR7]](s16) - ; SI: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; SI: [[GEP15:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C11]](s64) + ; SI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY14]](s32) + ; SI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) + ; SI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] + ; SI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; SI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; SI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C10]](s32) + ; SI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] + ; SI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) + ; SI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; SI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C10]](s32) + ; SI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; SI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) + ; SI: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; SI: [[GEP15:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C12]](s64) ; SI: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[GEP15]](p1) :: (load 1, addrspace 1) ; SI: [[GEP16:%[0-9]+]]:_(p1) = G_GEP [[GEP15]], [[C]](s64) ; SI: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[GEP16]](p1) :: (load 1, addrspace 1) @@ -7242,34 +7843,42 @@ body: | ; SI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; SI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LOAD17]](s32) ; SI: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C9]] - ; SI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[COPY16]](s32) - ; SI: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32) - ; SI: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[TRUNC17]] + ; SI: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[COPY16]](s32) + ; SI: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[SHL12]](s32) + ; SI: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[TRUNC17]] ; SI: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32) ; SI: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C7]] ; SI: [[COPY18:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; SI: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LOAD19]](s32) ; SI: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C9]] - ; SI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[COPY18]](s32) - ; SI: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) - ; SI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[TRUNC19]] + ; SI: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[COPY18]](s32) + ; SI: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[SHL13]](s32) + ; SI: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[TRUNC19]] ; SI: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD20]](s32) ; SI: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C7]] ; SI: [[COPY20:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; SI: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LOAD21]](s32) ; SI: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C9]] - ; SI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[COPY20]](s32) - ; SI: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[SHL10]](s32) - ; SI: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[TRUNC21]] + ; SI: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[COPY20]](s32) + ; SI: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[SHL14]](s32) + ; SI: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[TRUNC21]] ; SI: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32) ; SI: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C7]] ; SI: [[COPY22:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; SI: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LOAD23]](s32) ; SI: [[AND23:%[0-9]+]]:_(s32) = G_AND [[COPY23]], [[C9]] - ; SI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[COPY22]](s32) - ; SI: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[SHL11]](s32) - ; SI: [[OR11:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[TRUNC23]] - ; SI: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR8]](s16), [[OR9]](s16), [[OR10]](s16), [[OR11]](s16) + ; SI: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[COPY22]](s32) + ; SI: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[SHL15]](s32) + ; SI: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[TRUNC23]] + ; SI: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16) + ; SI: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16) + ; SI: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C10]](s32) + ; SI: [[OR16:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL16]] + ; SI: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16) + ; SI: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR15]](s16) + ; SI: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C10]](s32) + ; SI: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]] + ; SI: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32) ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) ; SI: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF ; SI: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 @@ -7338,9 +7947,18 @@ body: | ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32) ; CI-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; CI-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) - ; CI-MESA: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-MESA: [[GEP7:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C10]](s64) + ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI-MESA: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32) + ; CI-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; CI-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CI-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32) + ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; CI-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) + ; CI-MESA: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; CI-MESA: [[GEP7:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C11]](s64) ; CI-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p1) :: (load 1, addrspace 1) ; CI-MESA: [[GEP8:%[0-9]+]]:_(p1) = G_GEP [[GEP7]], [[C]](s64) ; CI-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p1) :: (load 1, addrspace 1) @@ -7361,36 +7979,44 @@ body: | ; CI-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) ; CI-MESA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C9]] - ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY8]](s32) - ; CI-MESA: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; CI-MESA: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] + ; CI-MESA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY8]](s32) + ; CI-MESA: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) + ; CI-MESA: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] ; CI-MESA: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; CI-MESA: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] ; CI-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI-MESA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) ; CI-MESA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C9]] - ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY10]](s32) - ; CI-MESA: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) - ; CI-MESA: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] + ; CI-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY10]](s32) + ; CI-MESA: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) + ; CI-MESA: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] ; CI-MESA: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32) ; CI-MESA: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]] ; CI-MESA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI-MESA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) ; CI-MESA: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C9]] - ; CI-MESA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY12]](s32) - ; CI-MESA: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) - ; CI-MESA: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] + ; CI-MESA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY12]](s32) + ; CI-MESA: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32) + ; CI-MESA: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] ; CI-MESA: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; CI-MESA: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]] ; CI-MESA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI-MESA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32) ; CI-MESA: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C9]] - ; CI-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY14]](s32) - ; CI-MESA: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) - ; CI-MESA: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] - ; CI-MESA: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16), [[OR6]](s16), [[OR7]](s16) - ; CI-MESA: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI-MESA: [[GEP15:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C11]](s64) + ; CI-MESA: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY14]](s32) + ; CI-MESA: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) + ; CI-MESA: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] + ; CI-MESA: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; CI-MESA: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; CI-MESA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C10]](s32) + ; CI-MESA: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] + ; CI-MESA: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) + ; CI-MESA: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; CI-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C10]](s32) + ; CI-MESA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; CI-MESA: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) + ; CI-MESA: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; CI-MESA: [[GEP15:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C12]](s64) ; CI-MESA: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[GEP15]](p1) :: (load 1, addrspace 1) ; CI-MESA: [[GEP16:%[0-9]+]]:_(p1) = G_GEP [[GEP15]], [[C]](s64) ; CI-MESA: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[GEP16]](p1) :: (load 1, addrspace 1) @@ -7411,34 +8037,42 @@ body: | ; CI-MESA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI-MESA: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LOAD17]](s32) ; CI-MESA: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C9]] - ; CI-MESA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[COPY16]](s32) - ; CI-MESA: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32) - ; CI-MESA: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[TRUNC17]] + ; CI-MESA: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[COPY16]](s32) + ; CI-MESA: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[SHL12]](s32) + ; CI-MESA: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[TRUNC17]] ; CI-MESA: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32) ; CI-MESA: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C7]] ; CI-MESA: [[COPY18:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI-MESA: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LOAD19]](s32) ; CI-MESA: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C9]] - ; CI-MESA: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[COPY18]](s32) - ; CI-MESA: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) - ; CI-MESA: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[TRUNC19]] + ; CI-MESA: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[COPY18]](s32) + ; CI-MESA: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[SHL13]](s32) + ; CI-MESA: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[TRUNC19]] ; CI-MESA: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD20]](s32) ; CI-MESA: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C7]] ; CI-MESA: [[COPY20:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI-MESA: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LOAD21]](s32) ; CI-MESA: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C9]] - ; CI-MESA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[COPY20]](s32) - ; CI-MESA: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[SHL10]](s32) - ; CI-MESA: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[TRUNC21]] + ; CI-MESA: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[COPY20]](s32) + ; CI-MESA: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[SHL14]](s32) + ; CI-MESA: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[TRUNC21]] ; CI-MESA: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32) ; CI-MESA: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C7]] ; CI-MESA: [[COPY22:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI-MESA: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LOAD23]](s32) ; CI-MESA: [[AND23:%[0-9]+]]:_(s32) = G_AND [[COPY23]], [[C9]] - ; CI-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[COPY22]](s32) - ; CI-MESA: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[SHL11]](s32) - ; CI-MESA: [[OR11:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[TRUNC23]] - ; CI-MESA: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR8]](s16), [[OR9]](s16), [[OR10]](s16), [[OR11]](s16) + ; CI-MESA: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[COPY22]](s32) + ; CI-MESA: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[SHL15]](s32) + ; CI-MESA: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[TRUNC23]] + ; CI-MESA: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16) + ; CI-MESA: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16) + ; CI-MESA: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C10]](s32) + ; CI-MESA: [[OR16:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL16]] + ; CI-MESA: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16) + ; CI-MESA: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR15]](s16) + ; CI-MESA: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C10]](s32) + ; CI-MESA: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]] + ; CI-MESA: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32) ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) ; CI-MESA: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 @@ -7493,9 +8127,18 @@ body: | ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]] ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16) ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) - ; VI: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI: [[GEP7:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C9]](s64) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) + ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) + ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) + ; VI: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; VI: [[GEP7:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C10]](s64) ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p1) :: (load 1, addrspace 1) ; VI: [[GEP8:%[0-9]+]]:_(p1) = G_GEP [[GEP7]], [[C]](s64) ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p1) :: (load 1, addrspace 1) @@ -7515,29 +8158,37 @@ body: | ; VI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] ; VI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) ; VI: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]] - ; VI: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) - ; VI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]] + ; VI: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) + ; VI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]] ; VI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; VI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] ; VI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) ; VI: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]] - ; VI: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) - ; VI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] + ; VI: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) + ; VI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]] ; VI: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32) ; VI: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]] ; VI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32) ; VI: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]] - ; VI: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16) - ; VI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL6]] + ; VI: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16) + ; VI: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]] ; VI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; VI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]] ; VI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32) ; VI: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]] - ; VI: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16) - ; VI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL7]] - ; VI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16), [[OR6]](s16), [[OR7]](s16) - ; VI: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; VI: [[GEP15:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C10]](s64) + ; VI: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16) + ; VI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]] + ; VI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; VI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; VI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32) + ; VI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] + ; VI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) + ; VI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32) + ; VI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; VI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) + ; VI: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; VI: [[GEP15:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C11]](s64) ; VI: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[GEP15]](p1) :: (load 1, addrspace 1) ; VI: [[GEP16:%[0-9]+]]:_(p1) = G_GEP [[GEP15]], [[C]](s64) ; VI: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[GEP16]](p1) :: (load 1, addrspace 1) @@ -7557,27 +8208,35 @@ body: | ; VI: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C7]] ; VI: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD17]](s32) ; VI: [[AND17:%[0-9]+]]:_(s16) = G_AND [[TRUNC17]], [[C7]] - ; VI: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND17]], [[C8]](s16) - ; VI: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[SHL8]] + ; VI: [[SHL12:%[0-9]+]]:_(s16) = G_SHL [[AND17]], [[C8]](s16) + ; VI: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[SHL12]] ; VI: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32) ; VI: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C7]] ; VI: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD19]](s32) ; VI: [[AND19:%[0-9]+]]:_(s16) = G_AND [[TRUNC19]], [[C7]] - ; VI: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND19]], [[C8]](s16) - ; VI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[SHL9]] + ; VI: [[SHL13:%[0-9]+]]:_(s16) = G_SHL [[AND19]], [[C8]](s16) + ; VI: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[SHL13]] ; VI: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD20]](s32) ; VI: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C7]] ; VI: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD21]](s32) ; VI: [[AND21:%[0-9]+]]:_(s16) = G_AND [[TRUNC21]], [[C7]] - ; VI: [[SHL10:%[0-9]+]]:_(s16) = G_SHL [[AND21]], [[C8]](s16) - ; VI: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[SHL10]] + ; VI: [[SHL14:%[0-9]+]]:_(s16) = G_SHL [[AND21]], [[C8]](s16) + ; VI: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[SHL14]] ; VI: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32) ; VI: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C7]] ; VI: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD23]](s32) ; VI: [[AND23:%[0-9]+]]:_(s16) = G_AND [[TRUNC23]], [[C7]] - ; VI: [[SHL11:%[0-9]+]]:_(s16) = G_SHL [[AND23]], [[C8]](s16) - ; VI: [[OR11:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[SHL11]] - ; VI: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR8]](s16), [[OR9]](s16), [[OR10]](s16), [[OR11]](s16) + ; VI: [[SHL15:%[0-9]+]]:_(s16) = G_SHL [[AND23]], [[C8]](s16) + ; VI: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[SHL15]] + ; VI: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16) + ; VI: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16) + ; VI: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C9]](s32) + ; VI: [[OR16:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL16]] + ; VI: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16) + ; VI: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR15]](s16) + ; VI: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C9]](s32) + ; VI: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]] + ; VI: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32) ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) ; VI: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF ; VI: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 @@ -7638,9 +8297,18 @@ body: | ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]] ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16) ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; GFX9-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) - ; GFX9-MESA: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GFX9-MESA: [[GEP7:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C9]](s64) + ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9-MESA: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) + ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; GFX9-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; GFX9-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) + ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; GFX9-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) + ; GFX9-MESA: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; GFX9-MESA: [[GEP7:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C10]](s64) ; GFX9-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p1) :: (load 1, addrspace 1) ; GFX9-MESA: [[GEP8:%[0-9]+]]:_(p1) = G_GEP [[GEP7]], [[C]](s64) ; GFX9-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p1) :: (load 1, addrspace 1) @@ -7660,29 +8328,37 @@ body: | ; GFX9-MESA: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] ; GFX9-MESA: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) ; GFX9-MESA: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]] - ; GFX9-MESA: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) - ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]] + ; GFX9-MESA: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) + ; GFX9-MESA: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]] ; GFX9-MESA: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; GFX9-MESA: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] ; GFX9-MESA: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) ; GFX9-MESA: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]] - ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) - ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] + ; GFX9-MESA: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) + ; GFX9-MESA: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]] ; GFX9-MESA: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32) ; GFX9-MESA: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]] ; GFX9-MESA: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32) ; GFX9-MESA: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]] - ; GFX9-MESA: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16) - ; GFX9-MESA: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL6]] + ; GFX9-MESA: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16) + ; GFX9-MESA: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]] ; GFX9-MESA: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; GFX9-MESA: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]] ; GFX9-MESA: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32) ; GFX9-MESA: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]] - ; GFX9-MESA: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16) - ; GFX9-MESA: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL7]] - ; GFX9-MESA: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16), [[OR6]](s16), [[OR7]](s16) - ; GFX9-MESA: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9-MESA: [[GEP15:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C10]](s64) + ; GFX9-MESA: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16) + ; GFX9-MESA: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]] + ; GFX9-MESA: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; GFX9-MESA: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; GFX9-MESA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32) + ; GFX9-MESA: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] + ; GFX9-MESA: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) + ; GFX9-MESA: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; GFX9-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32) + ; GFX9-MESA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; GFX9-MESA: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) + ; GFX9-MESA: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; GFX9-MESA: [[GEP15:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C11]](s64) ; GFX9-MESA: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[GEP15]](p1) :: (load 1, addrspace 1) ; GFX9-MESA: [[GEP16:%[0-9]+]]:_(p1) = G_GEP [[GEP15]], [[C]](s64) ; GFX9-MESA: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[GEP16]](p1) :: (load 1, addrspace 1) @@ -7702,27 +8378,35 @@ body: | ; GFX9-MESA: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C7]] ; GFX9-MESA: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD17]](s32) ; GFX9-MESA: [[AND17:%[0-9]+]]:_(s16) = G_AND [[TRUNC17]], [[C7]] - ; GFX9-MESA: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND17]], [[C8]](s16) - ; GFX9-MESA: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[SHL8]] + ; GFX9-MESA: [[SHL12:%[0-9]+]]:_(s16) = G_SHL [[AND17]], [[C8]](s16) + ; GFX9-MESA: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[SHL12]] ; GFX9-MESA: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32) ; GFX9-MESA: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C7]] ; GFX9-MESA: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD19]](s32) ; GFX9-MESA: [[AND19:%[0-9]+]]:_(s16) = G_AND [[TRUNC19]], [[C7]] - ; GFX9-MESA: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND19]], [[C8]](s16) - ; GFX9-MESA: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[SHL9]] + ; GFX9-MESA: [[SHL13:%[0-9]+]]:_(s16) = G_SHL [[AND19]], [[C8]](s16) + ; GFX9-MESA: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[SHL13]] ; GFX9-MESA: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD20]](s32) ; GFX9-MESA: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C7]] ; GFX9-MESA: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD21]](s32) ; GFX9-MESA: [[AND21:%[0-9]+]]:_(s16) = G_AND [[TRUNC21]], [[C7]] - ; GFX9-MESA: [[SHL10:%[0-9]+]]:_(s16) = G_SHL [[AND21]], [[C8]](s16) - ; GFX9-MESA: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[SHL10]] + ; GFX9-MESA: [[SHL14:%[0-9]+]]:_(s16) = G_SHL [[AND21]], [[C8]](s16) + ; GFX9-MESA: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[SHL14]] ; GFX9-MESA: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32) ; GFX9-MESA: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C7]] ; GFX9-MESA: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD23]](s32) ; GFX9-MESA: [[AND23:%[0-9]+]]:_(s16) = G_AND [[TRUNC23]], [[C7]] - ; GFX9-MESA: [[SHL11:%[0-9]+]]:_(s16) = G_SHL [[AND23]], [[C8]](s16) - ; GFX9-MESA: [[OR11:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[SHL11]] - ; GFX9-MESA: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR8]](s16), [[OR9]](s16), [[OR10]](s16), [[OR11]](s16) + ; GFX9-MESA: [[SHL15:%[0-9]+]]:_(s16) = G_SHL [[AND23]], [[C8]](s16) + ; GFX9-MESA: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[SHL15]] + ; GFX9-MESA: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16) + ; GFX9-MESA: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16) + ; GFX9-MESA: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C9]](s32) + ; GFX9-MESA: [[OR16:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL16]] + ; GFX9-MESA: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16) + ; GFX9-MESA: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR15]](s16) + ; GFX9-MESA: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C9]](s32) + ; GFX9-MESA: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]] + ; GFX9-MESA: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32) ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 @@ -7868,9 +8552,18 @@ body: | ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32) ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) - ; SI: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; SI: [[GEP7:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C10]](s64) + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; SI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32) + ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32) + ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) + ; SI: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; SI: [[GEP7:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C11]](s64) ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p1) :: (load 1, addrspace 1) ; SI: [[GEP8:%[0-9]+]]:_(p1) = G_GEP [[GEP7]], [[C]](s64) ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p1) :: (load 1, addrspace 1) @@ -7891,36 +8584,44 @@ body: | ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C9]] - ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY8]](s32) - ; SI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; SI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] + ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY8]](s32) + ; SI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) + ; SI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] ; SI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; SI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C9]] - ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY10]](s32) - ; SI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) - ; SI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] + ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY10]](s32) + ; SI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) + ; SI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] ; SI: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32) ; SI: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]] ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) ; SI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C9]] - ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY12]](s32) - ; SI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) - ; SI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] + ; SI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY12]](s32) + ; SI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32) + ; SI: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] ; SI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; SI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]] ; SI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; SI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32) ; SI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C9]] - ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY14]](s32) - ; SI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) - ; SI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] - ; SI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16), [[OR6]](s16), [[OR7]](s16) - ; SI: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; SI: [[GEP15:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C11]](s64) + ; SI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY14]](s32) + ; SI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) + ; SI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] + ; SI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; SI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; SI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C10]](s32) + ; SI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] + ; SI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) + ; SI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; SI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C10]](s32) + ; SI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; SI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) + ; SI: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; SI: [[GEP15:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C12]](s64) ; SI: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[GEP15]](p1) :: (load 1, addrspace 1) ; SI: [[GEP16:%[0-9]+]]:_(p1) = G_GEP [[GEP15]], [[C]](s64) ; SI: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[GEP16]](p1) :: (load 1, addrspace 1) @@ -7941,36 +8642,44 @@ body: | ; SI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; SI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LOAD17]](s32) ; SI: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C9]] - ; SI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[COPY16]](s32) - ; SI: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32) - ; SI: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[TRUNC17]] + ; SI: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[COPY16]](s32) + ; SI: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[SHL12]](s32) + ; SI: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[TRUNC17]] ; SI: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32) ; SI: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C7]] ; SI: [[COPY18:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; SI: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LOAD19]](s32) ; SI: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C9]] - ; SI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[COPY18]](s32) - ; SI: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) - ; SI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[TRUNC19]] + ; SI: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[COPY18]](s32) + ; SI: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[SHL13]](s32) + ; SI: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[TRUNC19]] ; SI: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD20]](s32) ; SI: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C7]] ; SI: [[COPY20:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; SI: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LOAD21]](s32) ; SI: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C9]] - ; SI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[COPY20]](s32) - ; SI: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[SHL10]](s32) - ; SI: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[TRUNC21]] + ; SI: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[COPY20]](s32) + ; SI: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[SHL14]](s32) + ; SI: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[TRUNC21]] ; SI: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32) ; SI: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C7]] ; SI: [[COPY22:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; SI: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LOAD23]](s32) ; SI: [[AND23:%[0-9]+]]:_(s32) = G_AND [[COPY23]], [[C9]] - ; SI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[COPY22]](s32) - ; SI: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[SHL11]](s32) - ; SI: [[OR11:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[TRUNC23]] - ; SI: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR8]](s16), [[OR9]](s16), [[OR10]](s16), [[OR11]](s16) - ; SI: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 - ; SI: [[GEP23:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C12]](s64) + ; SI: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[COPY22]](s32) + ; SI: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[SHL15]](s32) + ; SI: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[TRUNC23]] + ; SI: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16) + ; SI: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16) + ; SI: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C10]](s32) + ; SI: [[OR16:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL16]] + ; SI: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16) + ; SI: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR15]](s16) + ; SI: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C10]](s32) + ; SI: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]] + ; SI: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32) + ; SI: [[C13:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 + ; SI: [[GEP23:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C13]](s64) ; SI: [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[GEP23]](p1) :: (load 1, addrspace 1) ; SI: [[GEP24:%[0-9]+]]:_(p1) = G_GEP [[GEP23]], [[C]](s64) ; SI: [[LOAD25:%[0-9]+]]:_(s32) = G_LOAD [[GEP24]](p1) :: (load 1, addrspace 1) @@ -7991,34 +8700,42 @@ body: | ; SI: [[COPY24:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; SI: [[COPY25:%[0-9]+]]:_(s32) = COPY [[LOAD25]](s32) ; SI: [[AND25:%[0-9]+]]:_(s32) = G_AND [[COPY25]], [[C9]] - ; SI: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND25]], [[COPY24]](s32) - ; SI: [[TRUNC25:%[0-9]+]]:_(s16) = G_TRUNC [[SHL12]](s32) - ; SI: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND24]], [[TRUNC25]] + ; SI: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[AND25]], [[COPY24]](s32) + ; SI: [[TRUNC25:%[0-9]+]]:_(s16) = G_TRUNC [[SHL18]](s32) + ; SI: [[OR18:%[0-9]+]]:_(s16) = G_OR [[AND24]], [[TRUNC25]] ; SI: [[TRUNC26:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD26]](s32) ; SI: [[AND26:%[0-9]+]]:_(s16) = G_AND [[TRUNC26]], [[C7]] ; SI: [[COPY26:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; SI: [[COPY27:%[0-9]+]]:_(s32) = COPY [[LOAD27]](s32) ; SI: [[AND27:%[0-9]+]]:_(s32) = G_AND [[COPY27]], [[C9]] - ; SI: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND27]], [[COPY26]](s32) - ; SI: [[TRUNC27:%[0-9]+]]:_(s16) = G_TRUNC [[SHL13]](s32) - ; SI: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND26]], [[TRUNC27]] + ; SI: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[AND27]], [[COPY26]](s32) + ; SI: [[TRUNC27:%[0-9]+]]:_(s16) = G_TRUNC [[SHL19]](s32) + ; SI: [[OR19:%[0-9]+]]:_(s16) = G_OR [[AND26]], [[TRUNC27]] ; SI: [[TRUNC28:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD28]](s32) ; SI: [[AND28:%[0-9]+]]:_(s16) = G_AND [[TRUNC28]], [[C7]] ; SI: [[COPY28:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; SI: [[COPY29:%[0-9]+]]:_(s32) = COPY [[LOAD29]](s32) ; SI: [[AND29:%[0-9]+]]:_(s32) = G_AND [[COPY29]], [[C9]] - ; SI: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND29]], [[COPY28]](s32) - ; SI: [[TRUNC29:%[0-9]+]]:_(s16) = G_TRUNC [[SHL14]](s32) - ; SI: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND28]], [[TRUNC29]] + ; SI: [[SHL20:%[0-9]+]]:_(s32) = G_SHL [[AND29]], [[COPY28]](s32) + ; SI: [[TRUNC29:%[0-9]+]]:_(s16) = G_TRUNC [[SHL20]](s32) + ; SI: [[OR20:%[0-9]+]]:_(s16) = G_OR [[AND28]], [[TRUNC29]] ; SI: [[TRUNC30:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD30]](s32) ; SI: [[AND30:%[0-9]+]]:_(s16) = G_AND [[TRUNC30]], [[C7]] ; SI: [[COPY30:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; SI: [[COPY31:%[0-9]+]]:_(s32) = COPY [[LOAD31]](s32) ; SI: [[AND31:%[0-9]+]]:_(s32) = G_AND [[COPY31]], [[C9]] - ; SI: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND31]], [[COPY30]](s32) - ; SI: [[TRUNC31:%[0-9]+]]:_(s16) = G_TRUNC [[SHL15]](s32) - ; SI: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND30]], [[TRUNC31]] - ; SI: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR12]](s16), [[OR13]](s16), [[OR14]](s16), [[OR15]](s16) + ; SI: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[AND31]], [[COPY30]](s32) + ; SI: [[TRUNC31:%[0-9]+]]:_(s16) = G_TRUNC [[SHL21]](s32) + ; SI: [[OR21:%[0-9]+]]:_(s16) = G_OR [[AND30]], [[TRUNC31]] + ; SI: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[OR18]](s16) + ; SI: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[OR19]](s16) + ; SI: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[ZEXT13]], [[C10]](s32) + ; SI: [[OR22:%[0-9]+]]:_(s32) = G_OR [[ZEXT12]], [[SHL22]] + ; SI: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[OR20]](s16) + ; SI: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[OR21]](s16) + ; SI: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[ZEXT15]], [[C10]](s32) + ; SI: [[OR23:%[0-9]+]]:_(s32) = G_OR [[ZEXT14]], [[SHL23]] + ; SI: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR22]](s32), [[OR23]](s32) ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) ; SI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; CI-HSA-LABEL: name: test_load_global_v4s64_align1 @@ -8083,9 +8800,18 @@ body: | ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32) ; CI-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; CI-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) - ; CI-MESA: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-MESA: [[GEP7:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C10]](s64) + ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI-MESA: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32) + ; CI-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; CI-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CI-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32) + ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; CI-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) + ; CI-MESA: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; CI-MESA: [[GEP7:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C11]](s64) ; CI-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p1) :: (load 1, addrspace 1) ; CI-MESA: [[GEP8:%[0-9]+]]:_(p1) = G_GEP [[GEP7]], [[C]](s64) ; CI-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p1) :: (load 1, addrspace 1) @@ -8106,36 +8832,44 @@ body: | ; CI-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) ; CI-MESA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C9]] - ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY8]](s32) - ; CI-MESA: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; CI-MESA: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] + ; CI-MESA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY8]](s32) + ; CI-MESA: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) + ; CI-MESA: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] ; CI-MESA: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; CI-MESA: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] ; CI-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI-MESA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) ; CI-MESA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C9]] - ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY10]](s32) - ; CI-MESA: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) - ; CI-MESA: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] + ; CI-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY10]](s32) + ; CI-MESA: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) + ; CI-MESA: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] ; CI-MESA: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32) ; CI-MESA: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]] ; CI-MESA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI-MESA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) ; CI-MESA: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C9]] - ; CI-MESA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY12]](s32) - ; CI-MESA: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) - ; CI-MESA: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] + ; CI-MESA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY12]](s32) + ; CI-MESA: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32) + ; CI-MESA: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] ; CI-MESA: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; CI-MESA: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]] ; CI-MESA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI-MESA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32) ; CI-MESA: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C9]] - ; CI-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY14]](s32) - ; CI-MESA: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) - ; CI-MESA: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] - ; CI-MESA: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16), [[OR6]](s16), [[OR7]](s16) - ; CI-MESA: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI-MESA: [[GEP15:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C11]](s64) + ; CI-MESA: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY14]](s32) + ; CI-MESA: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) + ; CI-MESA: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] + ; CI-MESA: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; CI-MESA: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; CI-MESA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C10]](s32) + ; CI-MESA: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] + ; CI-MESA: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) + ; CI-MESA: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; CI-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C10]](s32) + ; CI-MESA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; CI-MESA: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) + ; CI-MESA: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; CI-MESA: [[GEP15:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C12]](s64) ; CI-MESA: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[GEP15]](p1) :: (load 1, addrspace 1) ; CI-MESA: [[GEP16:%[0-9]+]]:_(p1) = G_GEP [[GEP15]], [[C]](s64) ; CI-MESA: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[GEP16]](p1) :: (load 1, addrspace 1) @@ -8156,36 +8890,44 @@ body: | ; CI-MESA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI-MESA: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LOAD17]](s32) ; CI-MESA: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C9]] - ; CI-MESA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[COPY16]](s32) - ; CI-MESA: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32) - ; CI-MESA: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[TRUNC17]] + ; CI-MESA: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[COPY16]](s32) + ; CI-MESA: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[SHL12]](s32) + ; CI-MESA: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[TRUNC17]] ; CI-MESA: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32) ; CI-MESA: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C7]] ; CI-MESA: [[COPY18:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI-MESA: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LOAD19]](s32) ; CI-MESA: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C9]] - ; CI-MESA: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[COPY18]](s32) - ; CI-MESA: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) - ; CI-MESA: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[TRUNC19]] + ; CI-MESA: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[COPY18]](s32) + ; CI-MESA: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[SHL13]](s32) + ; CI-MESA: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[TRUNC19]] ; CI-MESA: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD20]](s32) ; CI-MESA: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C7]] ; CI-MESA: [[COPY20:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI-MESA: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LOAD21]](s32) ; CI-MESA: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C9]] - ; CI-MESA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[COPY20]](s32) - ; CI-MESA: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[SHL10]](s32) - ; CI-MESA: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[TRUNC21]] + ; CI-MESA: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[COPY20]](s32) + ; CI-MESA: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[SHL14]](s32) + ; CI-MESA: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[TRUNC21]] ; CI-MESA: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32) ; CI-MESA: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C7]] ; CI-MESA: [[COPY22:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI-MESA: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LOAD23]](s32) ; CI-MESA: [[AND23:%[0-9]+]]:_(s32) = G_AND [[COPY23]], [[C9]] - ; CI-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[COPY22]](s32) - ; CI-MESA: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[SHL11]](s32) - ; CI-MESA: [[OR11:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[TRUNC23]] - ; CI-MESA: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR8]](s16), [[OR9]](s16), [[OR10]](s16), [[OR11]](s16) - ; CI-MESA: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 - ; CI-MESA: [[GEP23:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C12]](s64) + ; CI-MESA: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[COPY22]](s32) + ; CI-MESA: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[SHL15]](s32) + ; CI-MESA: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[TRUNC23]] + ; CI-MESA: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16) + ; CI-MESA: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16) + ; CI-MESA: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C10]](s32) + ; CI-MESA: [[OR16:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL16]] + ; CI-MESA: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16) + ; CI-MESA: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR15]](s16) + ; CI-MESA: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C10]](s32) + ; CI-MESA: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]] + ; CI-MESA: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32) + ; CI-MESA: [[C13:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 + ; CI-MESA: [[GEP23:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C13]](s64) ; CI-MESA: [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[GEP23]](p1) :: (load 1, addrspace 1) ; CI-MESA: [[GEP24:%[0-9]+]]:_(p1) = G_GEP [[GEP23]], [[C]](s64) ; CI-MESA: [[LOAD25:%[0-9]+]]:_(s32) = G_LOAD [[GEP24]](p1) :: (load 1, addrspace 1) @@ -8206,34 +8948,42 @@ body: | ; CI-MESA: [[COPY24:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI-MESA: [[COPY25:%[0-9]+]]:_(s32) = COPY [[LOAD25]](s32) ; CI-MESA: [[AND25:%[0-9]+]]:_(s32) = G_AND [[COPY25]], [[C9]] - ; CI-MESA: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND25]], [[COPY24]](s32) - ; CI-MESA: [[TRUNC25:%[0-9]+]]:_(s16) = G_TRUNC [[SHL12]](s32) - ; CI-MESA: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND24]], [[TRUNC25]] + ; CI-MESA: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[AND25]], [[COPY24]](s32) + ; CI-MESA: [[TRUNC25:%[0-9]+]]:_(s16) = G_TRUNC [[SHL18]](s32) + ; CI-MESA: [[OR18:%[0-9]+]]:_(s16) = G_OR [[AND24]], [[TRUNC25]] ; CI-MESA: [[TRUNC26:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD26]](s32) ; CI-MESA: [[AND26:%[0-9]+]]:_(s16) = G_AND [[TRUNC26]], [[C7]] ; CI-MESA: [[COPY26:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI-MESA: [[COPY27:%[0-9]+]]:_(s32) = COPY [[LOAD27]](s32) ; CI-MESA: [[AND27:%[0-9]+]]:_(s32) = G_AND [[COPY27]], [[C9]] - ; CI-MESA: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND27]], [[COPY26]](s32) - ; CI-MESA: [[TRUNC27:%[0-9]+]]:_(s16) = G_TRUNC [[SHL13]](s32) - ; CI-MESA: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND26]], [[TRUNC27]] + ; CI-MESA: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[AND27]], [[COPY26]](s32) + ; CI-MESA: [[TRUNC27:%[0-9]+]]:_(s16) = G_TRUNC [[SHL19]](s32) + ; CI-MESA: [[OR19:%[0-9]+]]:_(s16) = G_OR [[AND26]], [[TRUNC27]] ; CI-MESA: [[TRUNC28:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD28]](s32) ; CI-MESA: [[AND28:%[0-9]+]]:_(s16) = G_AND [[TRUNC28]], [[C7]] ; CI-MESA: [[COPY28:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI-MESA: [[COPY29:%[0-9]+]]:_(s32) = COPY [[LOAD29]](s32) ; CI-MESA: [[AND29:%[0-9]+]]:_(s32) = G_AND [[COPY29]], [[C9]] - ; CI-MESA: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND29]], [[COPY28]](s32) - ; CI-MESA: [[TRUNC29:%[0-9]+]]:_(s16) = G_TRUNC [[SHL14]](s32) - ; CI-MESA: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND28]], [[TRUNC29]] + ; CI-MESA: [[SHL20:%[0-9]+]]:_(s32) = G_SHL [[AND29]], [[COPY28]](s32) + ; CI-MESA: [[TRUNC29:%[0-9]+]]:_(s16) = G_TRUNC [[SHL20]](s32) + ; CI-MESA: [[OR20:%[0-9]+]]:_(s16) = G_OR [[AND28]], [[TRUNC29]] ; CI-MESA: [[TRUNC30:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD30]](s32) ; CI-MESA: [[AND30:%[0-9]+]]:_(s16) = G_AND [[TRUNC30]], [[C7]] ; CI-MESA: [[COPY30:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI-MESA: [[COPY31:%[0-9]+]]:_(s32) = COPY [[LOAD31]](s32) ; CI-MESA: [[AND31:%[0-9]+]]:_(s32) = G_AND [[COPY31]], [[C9]] - ; CI-MESA: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND31]], [[COPY30]](s32) - ; CI-MESA: [[TRUNC31:%[0-9]+]]:_(s16) = G_TRUNC [[SHL15]](s32) - ; CI-MESA: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND30]], [[TRUNC31]] - ; CI-MESA: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR12]](s16), [[OR13]](s16), [[OR14]](s16), [[OR15]](s16) + ; CI-MESA: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[AND31]], [[COPY30]](s32) + ; CI-MESA: [[TRUNC31:%[0-9]+]]:_(s16) = G_TRUNC [[SHL21]](s32) + ; CI-MESA: [[OR21:%[0-9]+]]:_(s16) = G_OR [[AND30]], [[TRUNC31]] + ; CI-MESA: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[OR18]](s16) + ; CI-MESA: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[OR19]](s16) + ; CI-MESA: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[ZEXT13]], [[C10]](s32) + ; CI-MESA: [[OR22:%[0-9]+]]:_(s32) = G_OR [[ZEXT12]], [[SHL22]] + ; CI-MESA: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[OR20]](s16) + ; CI-MESA: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[OR21]](s16) + ; CI-MESA: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[ZEXT15]], [[C10]](s32) + ; CI-MESA: [[OR23:%[0-9]+]]:_(s32) = G_OR [[ZEXT14]], [[SHL23]] + ; CI-MESA: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR22]](s32), [[OR23]](s32) ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; VI-LABEL: name: test_load_global_v4s64_align1 @@ -8286,9 +9036,18 @@ body: | ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]] ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16) ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) - ; VI: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI: [[GEP7:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C9]](s64) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) + ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) + ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) + ; VI: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; VI: [[GEP7:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C10]](s64) ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p1) :: (load 1, addrspace 1) ; VI: [[GEP8:%[0-9]+]]:_(p1) = G_GEP [[GEP7]], [[C]](s64) ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p1) :: (load 1, addrspace 1) @@ -8308,29 +9067,37 @@ body: | ; VI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] ; VI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) ; VI: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]] - ; VI: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) - ; VI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]] + ; VI: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) + ; VI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]] ; VI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; VI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] ; VI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) ; VI: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]] - ; VI: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) - ; VI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] + ; VI: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) + ; VI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]] ; VI: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32) ; VI: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]] ; VI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32) ; VI: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]] - ; VI: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16) - ; VI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL6]] + ; VI: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16) + ; VI: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]] ; VI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; VI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]] ; VI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32) ; VI: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]] - ; VI: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16) - ; VI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL7]] - ; VI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16), [[OR6]](s16), [[OR7]](s16) - ; VI: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; VI: [[GEP15:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C10]](s64) + ; VI: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16) + ; VI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]] + ; VI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; VI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; VI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32) + ; VI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] + ; VI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) + ; VI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32) + ; VI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; VI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) + ; VI: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; VI: [[GEP15:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C11]](s64) ; VI: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[GEP15]](p1) :: (load 1, addrspace 1) ; VI: [[GEP16:%[0-9]+]]:_(p1) = G_GEP [[GEP15]], [[C]](s64) ; VI: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[GEP16]](p1) :: (load 1, addrspace 1) @@ -8350,29 +9117,37 @@ body: | ; VI: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C7]] ; VI: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD17]](s32) ; VI: [[AND17:%[0-9]+]]:_(s16) = G_AND [[TRUNC17]], [[C7]] - ; VI: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND17]], [[C8]](s16) - ; VI: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[SHL8]] + ; VI: [[SHL12:%[0-9]+]]:_(s16) = G_SHL [[AND17]], [[C8]](s16) + ; VI: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[SHL12]] ; VI: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32) ; VI: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C7]] ; VI: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD19]](s32) ; VI: [[AND19:%[0-9]+]]:_(s16) = G_AND [[TRUNC19]], [[C7]] - ; VI: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND19]], [[C8]](s16) - ; VI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[SHL9]] + ; VI: [[SHL13:%[0-9]+]]:_(s16) = G_SHL [[AND19]], [[C8]](s16) + ; VI: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[SHL13]] ; VI: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD20]](s32) ; VI: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C7]] ; VI: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD21]](s32) ; VI: [[AND21:%[0-9]+]]:_(s16) = G_AND [[TRUNC21]], [[C7]] - ; VI: [[SHL10:%[0-9]+]]:_(s16) = G_SHL [[AND21]], [[C8]](s16) - ; VI: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[SHL10]] + ; VI: [[SHL14:%[0-9]+]]:_(s16) = G_SHL [[AND21]], [[C8]](s16) + ; VI: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[SHL14]] ; VI: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32) ; VI: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C7]] ; VI: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD23]](s32) ; VI: [[AND23:%[0-9]+]]:_(s16) = G_AND [[TRUNC23]], [[C7]] - ; VI: [[SHL11:%[0-9]+]]:_(s16) = G_SHL [[AND23]], [[C8]](s16) - ; VI: [[OR11:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[SHL11]] - ; VI: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR8]](s16), [[OR9]](s16), [[OR10]](s16), [[OR11]](s16) - ; VI: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 - ; VI: [[GEP23:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C11]](s64) + ; VI: [[SHL15:%[0-9]+]]:_(s16) = G_SHL [[AND23]], [[C8]](s16) + ; VI: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[SHL15]] + ; VI: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16) + ; VI: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16) + ; VI: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C9]](s32) + ; VI: [[OR16:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL16]] + ; VI: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16) + ; VI: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR15]](s16) + ; VI: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C9]](s32) + ; VI: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]] + ; VI: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32) + ; VI: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 + ; VI: [[GEP23:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C12]](s64) ; VI: [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[GEP23]](p1) :: (load 1, addrspace 1) ; VI: [[GEP24:%[0-9]+]]:_(p1) = G_GEP [[GEP23]], [[C]](s64) ; VI: [[LOAD25:%[0-9]+]]:_(s32) = G_LOAD [[GEP24]](p1) :: (load 1, addrspace 1) @@ -8392,27 +9167,35 @@ body: | ; VI: [[AND24:%[0-9]+]]:_(s16) = G_AND [[TRUNC24]], [[C7]] ; VI: [[TRUNC25:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD25]](s32) ; VI: [[AND25:%[0-9]+]]:_(s16) = G_AND [[TRUNC25]], [[C7]] - ; VI: [[SHL12:%[0-9]+]]:_(s16) = G_SHL [[AND25]], [[C8]](s16) - ; VI: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND24]], [[SHL12]] + ; VI: [[SHL18:%[0-9]+]]:_(s16) = G_SHL [[AND25]], [[C8]](s16) + ; VI: [[OR18:%[0-9]+]]:_(s16) = G_OR [[AND24]], [[SHL18]] ; VI: [[TRUNC26:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD26]](s32) ; VI: [[AND26:%[0-9]+]]:_(s16) = G_AND [[TRUNC26]], [[C7]] ; VI: [[TRUNC27:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD27]](s32) ; VI: [[AND27:%[0-9]+]]:_(s16) = G_AND [[TRUNC27]], [[C7]] - ; VI: [[SHL13:%[0-9]+]]:_(s16) = G_SHL [[AND27]], [[C8]](s16) - ; VI: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND26]], [[SHL13]] + ; VI: [[SHL19:%[0-9]+]]:_(s16) = G_SHL [[AND27]], [[C8]](s16) + ; VI: [[OR19:%[0-9]+]]:_(s16) = G_OR [[AND26]], [[SHL19]] ; VI: [[TRUNC28:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD28]](s32) ; VI: [[AND28:%[0-9]+]]:_(s16) = G_AND [[TRUNC28]], [[C7]] ; VI: [[TRUNC29:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD29]](s32) ; VI: [[AND29:%[0-9]+]]:_(s16) = G_AND [[TRUNC29]], [[C7]] - ; VI: [[SHL14:%[0-9]+]]:_(s16) = G_SHL [[AND29]], [[C8]](s16) - ; VI: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND28]], [[SHL14]] + ; VI: [[SHL20:%[0-9]+]]:_(s16) = G_SHL [[AND29]], [[C8]](s16) + ; VI: [[OR20:%[0-9]+]]:_(s16) = G_OR [[AND28]], [[SHL20]] ; VI: [[TRUNC30:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD30]](s32) ; VI: [[AND30:%[0-9]+]]:_(s16) = G_AND [[TRUNC30]], [[C7]] ; VI: [[TRUNC31:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD31]](s32) ; VI: [[AND31:%[0-9]+]]:_(s16) = G_AND [[TRUNC31]], [[C7]] - ; VI: [[SHL15:%[0-9]+]]:_(s16) = G_SHL [[AND31]], [[C8]](s16) - ; VI: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND30]], [[SHL15]] - ; VI: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR12]](s16), [[OR13]](s16), [[OR14]](s16), [[OR15]](s16) + ; VI: [[SHL21:%[0-9]+]]:_(s16) = G_SHL [[AND31]], [[C8]](s16) + ; VI: [[OR21:%[0-9]+]]:_(s16) = G_OR [[AND30]], [[SHL21]] + ; VI: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[OR18]](s16) + ; VI: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[OR19]](s16) + ; VI: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[ZEXT13]], [[C9]](s32) + ; VI: [[OR22:%[0-9]+]]:_(s32) = G_OR [[ZEXT12]], [[SHL22]] + ; VI: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[OR20]](s16) + ; VI: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[OR21]](s16) + ; VI: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[ZEXT15]], [[C9]](s32) + ; VI: [[OR23:%[0-9]+]]:_(s32) = G_OR [[ZEXT14]], [[SHL23]] + ; VI: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR22]](s32), [[OR23]](s32) ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; GFX9-HSA-LABEL: name: test_load_global_v4s64_align1 @@ -8469,9 +9252,18 @@ body: | ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]] ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16) ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; GFX9-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) - ; GFX9-MESA: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GFX9-MESA: [[GEP7:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C9]](s64) + ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9-MESA: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) + ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; GFX9-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; GFX9-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) + ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; GFX9-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) + ; GFX9-MESA: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; GFX9-MESA: [[GEP7:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C10]](s64) ; GFX9-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p1) :: (load 1, addrspace 1) ; GFX9-MESA: [[GEP8:%[0-9]+]]:_(p1) = G_GEP [[GEP7]], [[C]](s64) ; GFX9-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p1) :: (load 1, addrspace 1) @@ -8491,29 +9283,37 @@ body: | ; GFX9-MESA: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] ; GFX9-MESA: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) ; GFX9-MESA: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]] - ; GFX9-MESA: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) - ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]] + ; GFX9-MESA: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) + ; GFX9-MESA: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]] ; GFX9-MESA: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; GFX9-MESA: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] ; GFX9-MESA: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) ; GFX9-MESA: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]] - ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) - ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] + ; GFX9-MESA: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) + ; GFX9-MESA: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]] ; GFX9-MESA: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32) ; GFX9-MESA: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]] ; GFX9-MESA: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32) ; GFX9-MESA: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]] - ; GFX9-MESA: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16) - ; GFX9-MESA: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL6]] + ; GFX9-MESA: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16) + ; GFX9-MESA: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]] ; GFX9-MESA: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; GFX9-MESA: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]] ; GFX9-MESA: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32) ; GFX9-MESA: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]] - ; GFX9-MESA: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16) - ; GFX9-MESA: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL7]] - ; GFX9-MESA: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16), [[OR6]](s16), [[OR7]](s16) - ; GFX9-MESA: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9-MESA: [[GEP15:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C10]](s64) + ; GFX9-MESA: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16) + ; GFX9-MESA: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]] + ; GFX9-MESA: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; GFX9-MESA: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; GFX9-MESA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32) + ; GFX9-MESA: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] + ; GFX9-MESA: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) + ; GFX9-MESA: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; GFX9-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32) + ; GFX9-MESA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; GFX9-MESA: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) + ; GFX9-MESA: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; GFX9-MESA: [[GEP15:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C11]](s64) ; GFX9-MESA: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[GEP15]](p1) :: (load 1, addrspace 1) ; GFX9-MESA: [[GEP16:%[0-9]+]]:_(p1) = G_GEP [[GEP15]], [[C]](s64) ; GFX9-MESA: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[GEP16]](p1) :: (load 1, addrspace 1) @@ -8533,29 +9333,37 @@ body: | ; GFX9-MESA: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C7]] ; GFX9-MESA: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD17]](s32) ; GFX9-MESA: [[AND17:%[0-9]+]]:_(s16) = G_AND [[TRUNC17]], [[C7]] - ; GFX9-MESA: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND17]], [[C8]](s16) - ; GFX9-MESA: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[SHL8]] + ; GFX9-MESA: [[SHL12:%[0-9]+]]:_(s16) = G_SHL [[AND17]], [[C8]](s16) + ; GFX9-MESA: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[SHL12]] ; GFX9-MESA: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32) ; GFX9-MESA: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C7]] ; GFX9-MESA: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD19]](s32) ; GFX9-MESA: [[AND19:%[0-9]+]]:_(s16) = G_AND [[TRUNC19]], [[C7]] - ; GFX9-MESA: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND19]], [[C8]](s16) - ; GFX9-MESA: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[SHL9]] + ; GFX9-MESA: [[SHL13:%[0-9]+]]:_(s16) = G_SHL [[AND19]], [[C8]](s16) + ; GFX9-MESA: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[SHL13]] ; GFX9-MESA: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD20]](s32) ; GFX9-MESA: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C7]] ; GFX9-MESA: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD21]](s32) ; GFX9-MESA: [[AND21:%[0-9]+]]:_(s16) = G_AND [[TRUNC21]], [[C7]] - ; GFX9-MESA: [[SHL10:%[0-9]+]]:_(s16) = G_SHL [[AND21]], [[C8]](s16) - ; GFX9-MESA: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[SHL10]] + ; GFX9-MESA: [[SHL14:%[0-9]+]]:_(s16) = G_SHL [[AND21]], [[C8]](s16) + ; GFX9-MESA: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[SHL14]] ; GFX9-MESA: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32) ; GFX9-MESA: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C7]] ; GFX9-MESA: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD23]](s32) ; GFX9-MESA: [[AND23:%[0-9]+]]:_(s16) = G_AND [[TRUNC23]], [[C7]] - ; GFX9-MESA: [[SHL11:%[0-9]+]]:_(s16) = G_SHL [[AND23]], [[C8]](s16) - ; GFX9-MESA: [[OR11:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[SHL11]] - ; GFX9-MESA: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR8]](s16), [[OR9]](s16), [[OR10]](s16), [[OR11]](s16) - ; GFX9-MESA: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 - ; GFX9-MESA: [[GEP23:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C11]](s64) + ; GFX9-MESA: [[SHL15:%[0-9]+]]:_(s16) = G_SHL [[AND23]], [[C8]](s16) + ; GFX9-MESA: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[SHL15]] + ; GFX9-MESA: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16) + ; GFX9-MESA: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16) + ; GFX9-MESA: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C9]](s32) + ; GFX9-MESA: [[OR16:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL16]] + ; GFX9-MESA: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16) + ; GFX9-MESA: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR15]](s16) + ; GFX9-MESA: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C9]](s32) + ; GFX9-MESA: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]] + ; GFX9-MESA: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32) + ; GFX9-MESA: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 + ; GFX9-MESA: [[GEP23:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C12]](s64) ; GFX9-MESA: [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[GEP23]](p1) :: (load 1, addrspace 1) ; GFX9-MESA: [[GEP24:%[0-9]+]]:_(p1) = G_GEP [[GEP23]], [[C]](s64) ; GFX9-MESA: [[LOAD25:%[0-9]+]]:_(s32) = G_LOAD [[GEP24]](p1) :: (load 1, addrspace 1) @@ -8575,27 +9383,35 @@ body: | ; GFX9-MESA: [[AND24:%[0-9]+]]:_(s16) = G_AND [[TRUNC24]], [[C7]] ; GFX9-MESA: [[TRUNC25:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD25]](s32) ; GFX9-MESA: [[AND25:%[0-9]+]]:_(s16) = G_AND [[TRUNC25]], [[C7]] - ; GFX9-MESA: [[SHL12:%[0-9]+]]:_(s16) = G_SHL [[AND25]], [[C8]](s16) - ; GFX9-MESA: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND24]], [[SHL12]] + ; GFX9-MESA: [[SHL18:%[0-9]+]]:_(s16) = G_SHL [[AND25]], [[C8]](s16) + ; GFX9-MESA: [[OR18:%[0-9]+]]:_(s16) = G_OR [[AND24]], [[SHL18]] ; GFX9-MESA: [[TRUNC26:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD26]](s32) ; GFX9-MESA: [[AND26:%[0-9]+]]:_(s16) = G_AND [[TRUNC26]], [[C7]] ; GFX9-MESA: [[TRUNC27:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD27]](s32) ; GFX9-MESA: [[AND27:%[0-9]+]]:_(s16) = G_AND [[TRUNC27]], [[C7]] - ; GFX9-MESA: [[SHL13:%[0-9]+]]:_(s16) = G_SHL [[AND27]], [[C8]](s16) - ; GFX9-MESA: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND26]], [[SHL13]] + ; GFX9-MESA: [[SHL19:%[0-9]+]]:_(s16) = G_SHL [[AND27]], [[C8]](s16) + ; GFX9-MESA: [[OR19:%[0-9]+]]:_(s16) = G_OR [[AND26]], [[SHL19]] ; GFX9-MESA: [[TRUNC28:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD28]](s32) ; GFX9-MESA: [[AND28:%[0-9]+]]:_(s16) = G_AND [[TRUNC28]], [[C7]] ; GFX9-MESA: [[TRUNC29:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD29]](s32) ; GFX9-MESA: [[AND29:%[0-9]+]]:_(s16) = G_AND [[TRUNC29]], [[C7]] - ; GFX9-MESA: [[SHL14:%[0-9]+]]:_(s16) = G_SHL [[AND29]], [[C8]](s16) - ; GFX9-MESA: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND28]], [[SHL14]] + ; GFX9-MESA: [[SHL20:%[0-9]+]]:_(s16) = G_SHL [[AND29]], [[C8]](s16) + ; GFX9-MESA: [[OR20:%[0-9]+]]:_(s16) = G_OR [[AND28]], [[SHL20]] ; GFX9-MESA: [[TRUNC30:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD30]](s32) ; GFX9-MESA: [[AND30:%[0-9]+]]:_(s16) = G_AND [[TRUNC30]], [[C7]] ; GFX9-MESA: [[TRUNC31:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD31]](s32) ; GFX9-MESA: [[AND31:%[0-9]+]]:_(s16) = G_AND [[TRUNC31]], [[C7]] - ; GFX9-MESA: [[SHL15:%[0-9]+]]:_(s16) = G_SHL [[AND31]], [[C8]](s16) - ; GFX9-MESA: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND30]], [[SHL15]] - ; GFX9-MESA: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR12]](s16), [[OR13]](s16), [[OR14]](s16), [[OR15]](s16) + ; GFX9-MESA: [[SHL21:%[0-9]+]]:_(s16) = G_SHL [[AND31]], [[C8]](s16) + ; GFX9-MESA: [[OR21:%[0-9]+]]:_(s16) = G_OR [[AND30]], [[SHL21]] + ; GFX9-MESA: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[OR18]](s16) + ; GFX9-MESA: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[OR19]](s16) + ; GFX9-MESA: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[ZEXT13]], [[C9]](s32) + ; GFX9-MESA: [[OR22:%[0-9]+]]:_(s32) = G_OR [[ZEXT12]], [[SHL22]] + ; GFX9-MESA: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[OR20]](s16) + ; GFX9-MESA: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[OR21]](s16) + ; GFX9-MESA: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[ZEXT15]], [[C9]](s32) + ; GFX9-MESA: [[OR23:%[0-9]+]]:_(s32) = G_OR [[ZEXT14]], [[SHL23]] + ; GFX9-MESA: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR22]](s32), [[OR23]](s32) ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) %0:_(p1) = COPY $vgpr0_vgpr1 @@ -8807,9 +9623,18 @@ body: | ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32) ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; SI: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) - ; SI: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; SI: [[GEP7:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C10]](s64) + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; SI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32) + ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32) + ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; SI: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) + ; SI: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; SI: [[GEP7:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C11]](s64) ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p1) :: (load 1, addrspace 1) ; SI: [[GEP8:%[0-9]+]]:_(p1) = G_GEP [[GEP7]], [[C]](s64) ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p1) :: (load 1, addrspace 1) @@ -8830,34 +9655,42 @@ body: | ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C9]] - ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY8]](s32) - ; SI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; SI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] + ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY8]](s32) + ; SI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) + ; SI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] ; SI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; SI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C9]] - ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY10]](s32) - ; SI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) - ; SI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] + ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY10]](s32) + ; SI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) + ; SI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] ; SI: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32) ; SI: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]] ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) ; SI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C9]] - ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY12]](s32) - ; SI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) - ; SI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] + ; SI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY12]](s32) + ; SI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32) + ; SI: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] ; SI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; SI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]] ; SI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; SI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32) ; SI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C9]] - ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY14]](s32) - ; SI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) - ; SI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] - ; SI: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16), [[OR6]](s16), [[OR7]](s16) + ; SI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY14]](s32) + ; SI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) + ; SI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] + ; SI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; SI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; SI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C10]](s32) + ; SI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] + ; SI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) + ; SI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; SI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C10]](s32) + ; SI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; SI: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[MV]](p1), [[MV1]](p1) ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x p1>) ; CI-HSA-LABEL: name: test_load_global_v2p1_align1 @@ -8922,9 +9755,18 @@ body: | ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32) ; CI-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; CI-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI-MESA: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) - ; CI-MESA: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CI-MESA: [[GEP7:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C10]](s64) + ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI-MESA: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32) + ; CI-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; CI-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CI-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32) + ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; CI-MESA: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) + ; CI-MESA: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; CI-MESA: [[GEP7:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C11]](s64) ; CI-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p1) :: (load 1, addrspace 1) ; CI-MESA: [[GEP8:%[0-9]+]]:_(p1) = G_GEP [[GEP7]], [[C]](s64) ; CI-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p1) :: (load 1, addrspace 1) @@ -8945,34 +9787,42 @@ body: | ; CI-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) ; CI-MESA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C9]] - ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY8]](s32) - ; CI-MESA: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; CI-MESA: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] + ; CI-MESA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY8]](s32) + ; CI-MESA: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) + ; CI-MESA: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] ; CI-MESA: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; CI-MESA: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] ; CI-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI-MESA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) ; CI-MESA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C9]] - ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY10]](s32) - ; CI-MESA: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) - ; CI-MESA: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] + ; CI-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY10]](s32) + ; CI-MESA: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) + ; CI-MESA: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] ; CI-MESA: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32) ; CI-MESA: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]] ; CI-MESA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI-MESA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) ; CI-MESA: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C9]] - ; CI-MESA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY12]](s32) - ; CI-MESA: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) - ; CI-MESA: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] + ; CI-MESA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY12]](s32) + ; CI-MESA: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32) + ; CI-MESA: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] ; CI-MESA: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; CI-MESA: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]] ; CI-MESA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI-MESA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32) ; CI-MESA: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C9]] - ; CI-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY14]](s32) - ; CI-MESA: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) - ; CI-MESA: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] - ; CI-MESA: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16), [[OR6]](s16), [[OR7]](s16) + ; CI-MESA: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY14]](s32) + ; CI-MESA: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) + ; CI-MESA: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] + ; CI-MESA: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; CI-MESA: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; CI-MESA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C10]](s32) + ; CI-MESA: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] + ; CI-MESA: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) + ; CI-MESA: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; CI-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C10]](s32) + ; CI-MESA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; CI-MESA: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[MV]](p1), [[MV1]](p1) ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x p1>) ; VI-LABEL: name: test_load_global_v2p1_align1 @@ -9025,9 +9875,18 @@ body: | ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]] ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16) ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; VI: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) - ; VI: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; VI: [[GEP7:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C9]](s64) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) + ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) + ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; VI: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) + ; VI: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; VI: [[GEP7:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C10]](s64) ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p1) :: (load 1, addrspace 1) ; VI: [[GEP8:%[0-9]+]]:_(p1) = G_GEP [[GEP7]], [[C]](s64) ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p1) :: (load 1, addrspace 1) @@ -9047,27 +9906,35 @@ body: | ; VI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] ; VI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) ; VI: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]] - ; VI: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) - ; VI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]] + ; VI: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) + ; VI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]] ; VI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; VI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] ; VI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) ; VI: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]] - ; VI: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) - ; VI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] + ; VI: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) + ; VI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]] ; VI: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32) ; VI: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]] ; VI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32) ; VI: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]] - ; VI: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16) - ; VI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL6]] + ; VI: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16) + ; VI: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]] ; VI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; VI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]] ; VI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32) ; VI: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]] - ; VI: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16) - ; VI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL7]] - ; VI: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16), [[OR6]](s16), [[OR7]](s16) + ; VI: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16) + ; VI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]] + ; VI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; VI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; VI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32) + ; VI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] + ; VI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) + ; VI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32) + ; VI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; VI: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[MV]](p1), [[MV1]](p1) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x p1>) ; GFX9-HSA-LABEL: name: test_load_global_v2p1_align1 @@ -9124,9 +9991,18 @@ body: | ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]] ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16) ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; GFX9-MESA: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) - ; GFX9-MESA: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GFX9-MESA: [[GEP7:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C9]](s64) + ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9-MESA: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) + ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; GFX9-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; GFX9-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) + ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; GFX9-MESA: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) + ; GFX9-MESA: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; GFX9-MESA: [[GEP7:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C10]](s64) ; GFX9-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p1) :: (load 1, addrspace 1) ; GFX9-MESA: [[GEP8:%[0-9]+]]:_(p1) = G_GEP [[GEP7]], [[C]](s64) ; GFX9-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p1) :: (load 1, addrspace 1) @@ -9146,27 +10022,35 @@ body: | ; GFX9-MESA: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] ; GFX9-MESA: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) ; GFX9-MESA: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]] - ; GFX9-MESA: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) - ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]] + ; GFX9-MESA: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) + ; GFX9-MESA: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]] ; GFX9-MESA: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; GFX9-MESA: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] ; GFX9-MESA: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) ; GFX9-MESA: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]] - ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) - ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] + ; GFX9-MESA: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) + ; GFX9-MESA: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]] ; GFX9-MESA: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32) ; GFX9-MESA: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]] ; GFX9-MESA: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32) ; GFX9-MESA: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]] - ; GFX9-MESA: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16) - ; GFX9-MESA: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL6]] + ; GFX9-MESA: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16) + ; GFX9-MESA: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]] ; GFX9-MESA: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; GFX9-MESA: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]] ; GFX9-MESA: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32) ; GFX9-MESA: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]] - ; GFX9-MESA: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16) - ; GFX9-MESA: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL7]] - ; GFX9-MESA: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16), [[OR6]](s16), [[OR7]](s16) + ; GFX9-MESA: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16) + ; GFX9-MESA: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]] + ; GFX9-MESA: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; GFX9-MESA: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; GFX9-MESA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32) + ; GFX9-MESA: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] + ; GFX9-MESA: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) + ; GFX9-MESA: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; GFX9-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32) + ; GFX9-MESA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; GFX9-MESA: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[MV]](p1), [[MV1]](p1) ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x p1>) %0:_(p1) = COPY $vgpr0_vgpr1 @@ -9315,9 +10199,14 @@ body: | ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; SI: [[MV:%[0-9]+]]:_(p3) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; SI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; SI: [[GEP3:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C6]](s64) + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32) + ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; SI: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32) + ; SI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; SI: [[GEP3:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C7]](s64) ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p1) :: (load 1, addrspace 1) ; SI: [[GEP4:%[0-9]+]]:_(p1) = G_GEP [[GEP3]], [[C]](s64) ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p1) :: (load 1, addrspace 1) @@ -9330,19 +10219,23 @@ body: | ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] - ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) - ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; SI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] + ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) + ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) + ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; SI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; SI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C5]] - ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY6]](s32) - ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; SI: [[MV1:%[0-9]+]]:_(p3) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[MV]](p3), [[MV1]](p3) + ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY6]](s32) + ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) + ; SI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] + ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C6]](s32) + ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; SI: [[INTTOPTR1:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR5]](s32) + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3) ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>) ; CI-HSA-LABEL: name: test_load_global_v2p3_align1 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -9378,9 +10271,14 @@ body: | ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CI-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI-MESA: [[MV:%[0-9]+]]:_(p3) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; CI-MESA: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-MESA: [[GEP3:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C6]](s64) + ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32) + ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; CI-MESA: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32) + ; CI-MESA: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CI-MESA: [[GEP3:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C7]](s64) ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p1) :: (load 1, addrspace 1) ; CI-MESA: [[GEP4:%[0-9]+]]:_(p1) = G_GEP [[GEP3]], [[C]](s64) ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p1) :: (load 1, addrspace 1) @@ -9393,19 +10291,23 @@ body: | ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] - ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) - ; CI-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; CI-MESA: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] + ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) + ; CI-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) + ; CI-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; CI-MESA: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; CI-MESA: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C5]] - ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY6]](s32) - ; CI-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; CI-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI-MESA: [[MV1:%[0-9]+]]:_(p3) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) - ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[MV]](p3), [[MV1]](p3) + ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY6]](s32) + ; CI-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) + ; CI-MESA: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] + ; CI-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C6]](s32) + ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; CI-MESA: [[INTTOPTR1:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR5]](s32) + ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3) ; CI-MESA: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>) ; VI-LABEL: name: test_load_global_v2p3_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -9433,9 +10335,14 @@ body: | ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; VI: [[MV:%[0-9]+]]:_(p3) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI: [[GEP3:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C5]](s64) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; VI: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32) + ; VI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; VI: [[GEP3:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C6]](s64) ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p1) :: (load 1, addrspace 1) ; VI: [[GEP4:%[0-9]+]]:_(p1) = G_GEP [[GEP3]], [[C]](s64) ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p1) :: (load 1, addrspace 1) @@ -9447,16 +10354,20 @@ body: | ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C3]] ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C3]] - ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) - ; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] + ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) + ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]] ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; VI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C3]] - ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) - ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; VI: [[MV1:%[0-9]+]]:_(p3) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[MV]](p3), [[MV1]](p3) + ; VI: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) + ; VI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL4]] + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C5]](s32) + ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; VI: [[INTTOPTR1:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR5]](s32) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3) ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>) ; GFX9-HSA-LABEL: name: test_load_global_v2p3_align1 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -9488,9 +10399,14 @@ body: | ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX9-MESA: [[MV:%[0-9]+]]:_(p3) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; GFX9-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-MESA: [[GEP3:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C5]](s64) + ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; GFX9-MESA: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32) + ; GFX9-MESA: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; GFX9-MESA: [[GEP3:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C6]](s64) ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p1) :: (load 1, addrspace 1) ; GFX9-MESA: [[GEP4:%[0-9]+]]:_(p1) = G_GEP [[GEP3]], [[C]](s64) ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p1) :: (load 1, addrspace 1) @@ -9502,16 +10418,20 @@ body: | ; GFX9-MESA: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C3]] ; GFX9-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) ; GFX9-MESA: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C3]] - ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) - ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] + ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) + ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]] ; GFX9-MESA: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; GFX9-MESA: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; GFX9-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C3]] - ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) - ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; GFX9-MESA: [[MV1:%[0-9]+]]:_(p3) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) - ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[MV]](p3), [[MV1]](p3) + ; GFX9-MESA: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) + ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL4]] + ; GFX9-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; GFX9-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C5]](s32) + ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; GFX9-MESA: [[INTTOPTR1:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR5]](s32) + ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3) ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<2 x p3>) = G_LOAD %0 :: (load 8, align 1, addrspace 1) @@ -9839,9 +10759,13 @@ body: | ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; SI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; SI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; SI: [[GEP3:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C6]](s64) + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32) + ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; SI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; SI: [[GEP3:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C7]](s64) ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p1) :: (load 1, addrspace 1) ; SI: [[GEP4:%[0-9]+]]:_(p1) = G_GEP [[GEP3]], [[C]](s64) ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p1) :: (load 1, addrspace 1) @@ -9854,19 +10778,22 @@ body: | ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] - ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) - ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; SI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] + ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) + ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) + ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; SI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; SI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C5]] - ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY6]](s32) - ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; SI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32) + ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY6]](s32) + ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) + ; SI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] + ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C6]](s32) + ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; CI-HSA-LABEL: name: test_extload_global_v2s32_from_4_align1 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -9906,9 +10833,13 @@ body: | ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CI-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI-MESA: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; CI-MESA: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-MESA: [[GEP3:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C6]](s64) + ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32) + ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; CI-MESA: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CI-MESA: [[GEP3:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C7]](s64) ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p1) :: (load 1, addrspace 1) ; CI-MESA: [[GEP4:%[0-9]+]]:_(p1) = G_GEP [[GEP3]], [[C]](s64) ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p1) :: (load 1, addrspace 1) @@ -9921,19 +10852,22 @@ body: | ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] - ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) - ; CI-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; CI-MESA: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] + ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) + ; CI-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) + ; CI-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; CI-MESA: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; CI-MESA: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C5]] - ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY6]](s32) - ; CI-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; CI-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI-MESA: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) - ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32) + ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY6]](s32) + ; CI-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) + ; CI-MESA: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] + ; CI-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C6]](s32) + ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) ; CI-MESA: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; VI-LABEL: name: test_extload_global_v2s32_from_4_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -9961,9 +10895,13 @@ body: | ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; VI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI: [[GEP3:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C5]](s64) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; VI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; VI: [[GEP3:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C6]](s64) ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p1) :: (load 1, addrspace 1) ; VI: [[GEP4:%[0-9]+]]:_(p1) = G_GEP [[GEP3]], [[C]](s64) ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p1) :: (load 1, addrspace 1) @@ -9975,16 +10913,19 @@ body: | ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C3]] ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C3]] - ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) - ; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] + ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) + ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]] ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; VI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C3]] - ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) - ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; VI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32) + ; VI: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) + ; VI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL4]] + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C5]](s32) + ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-HSA-LABEL: name: test_extload_global_v2s32_from_4_align1 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -10020,9 +10961,13 @@ body: | ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX9-MESA: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; GFX9-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-MESA: [[GEP3:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C5]](s64) + ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; GFX9-MESA: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; GFX9-MESA: [[GEP3:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C6]](s64) ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p1) :: (load 1, addrspace 1) ; GFX9-MESA: [[GEP4:%[0-9]+]]:_(p1) = G_GEP [[GEP3]], [[C]](s64) ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p1) :: (load 1, addrspace 1) @@ -10034,16 +10979,19 @@ body: | ; GFX9-MESA: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C3]] ; GFX9-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) ; GFX9-MESA: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C3]] - ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) - ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] + ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) + ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]] ; GFX9-MESA: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; GFX9-MESA: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; GFX9-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C3]] - ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) - ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; GFX9-MESA: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) - ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32) + ; GFX9-MESA: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) + ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL4]] + ; GFX9-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; GFX9-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C5]](s32) + ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = G_LOAD %0 :: (load 4, align 1, addrspace 1) @@ -10059,21 +11007,29 @@ body: | ; SI-LABEL: name: test_extload_global_v2s32_from_4_align2 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[GEP:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C]](s64) ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p1) :: (load 2, addrspace 1) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; SI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; SI: [[GEP1:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C1]](s64) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; SI: [[GEP1:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C3]](s64) ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p1) :: (load 2, addrspace 1) - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; SI: [[GEP2:%[0-9]+]]:_(p1) = G_GEP [[GEP1]], [[C]](s64) ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p1) :: (load 2, addrspace 1) - ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; SI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC2]](s16), [[TRUNC3]](s16) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32) + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; CI-HSA-LABEL: name: test_extload_global_v2s32_from_4_align2 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -10086,40 +11042,56 @@ body: | ; CI-MESA-LABEL: name: test_extload_global_v2s32_from_4_align2 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI-MESA: [[GEP:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C]](s64) ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p1) :: (load 2, addrspace 1) - ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; CI-MESA: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-MESA: [[GEP1:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C1]](s64) + ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CI-MESA: [[GEP1:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C3]](s64) ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p1) :: (load 2, addrspace 1) - ; CI-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI-MESA: [[GEP2:%[0-9]+]]:_(p1) = G_GEP [[GEP1]], [[C]](s64) ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p1) :: (load 2, addrspace 1) - ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; CI-MESA: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC2]](s16), [[TRUNC3]](s16) - ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32) + ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) ; CI-MESA: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; VI-LABEL: name: test_extload_global_v2s32_from_4_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[GEP:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C]](s64) ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p1) :: (load 2, addrspace 1) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; VI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI: [[GEP1:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C1]](s64) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; VI: [[GEP1:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C3]](s64) ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p1) :: (load 2, addrspace 1) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; VI: [[GEP2:%[0-9]+]]:_(p1) = G_GEP [[GEP1]], [[C]](s64) ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p1) :: (load 2, addrspace 1) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; VI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC2]](s16), [[TRUNC3]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32) + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-HSA-LABEL: name: test_extload_global_v2s32_from_4_align2 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -10132,21 +11104,29 @@ body: | ; GFX9-MESA-LABEL: name: test_extload_global_v2s32_from_4_align2 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-MESA: [[GEP:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C]](s64) ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p1) :: (load 2, addrspace 1) - ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9-MESA: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX9-MESA: [[GEP1:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C1]](s64) + ; GFX9-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; GFX9-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; GFX9-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX9-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; GFX9-MESA: [[GEP1:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C3]](s64) ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p1) :: (load 2, addrspace 1) - ; GFX9-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; GFX9-MESA: [[GEP2:%[0-9]+]]:_(p1) = G_GEP [[GEP1]], [[C]](s64) ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p1) :: (load 2, addrspace 1) - ; GFX9-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX9-MESA: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32) + ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = G_LOAD %0 :: (load 4, align 2, addrspace 1) @@ -10476,9 +11456,22 @@ body: | ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C12]](s32) ; SI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) ; SI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] - ; SI: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16), [[OR4]](s16), [[OR5]](s16) - ; SI: [[C14:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; SI: [[GEP11:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C14]](s64) + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; SI: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C14]](s32) + ; SI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL6]] + ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C14]](s32) + ; SI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL7]] + ; SI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; SI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) + ; SI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C14]](s32) + ; SI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] + ; SI: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR6]](s32), [[OR7]](s32), [[OR8]](s32) + ; SI: [[C15:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 + ; SI: [[GEP11:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C15]](s64) ; SI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[GEP11]](p1) :: (load 1, addrspace 1) ; SI: [[GEP12:%[0-9]+]]:_(p1) = G_GEP [[GEP11]], [[C]](s64) ; SI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[GEP12]](p1) :: (load 1, addrspace 1) @@ -10507,50 +11500,62 @@ body: | ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C12]](s32) ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) ; SI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C13]] - ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY12]](s32) - ; SI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) - ; SI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] + ; SI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY12]](s32) + ; SI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) + ; SI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] ; SI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; SI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C11]] ; SI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C12]](s32) ; SI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32) ; SI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C13]] - ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY14]](s32) - ; SI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) - ; SI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] + ; SI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY14]](s32) + ; SI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL10]](s32) + ; SI: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] ; SI: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD16]](s32) ; SI: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C11]] ; SI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[C12]](s32) ; SI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LOAD17]](s32) ; SI: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C13]] - ; SI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[COPY16]](s32) - ; SI: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32) - ; SI: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[TRUNC17]] + ; SI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[COPY16]](s32) + ; SI: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[SHL11]](s32) + ; SI: [[OR11:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[TRUNC17]] ; SI: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32) ; SI: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C11]] ; SI: [[COPY18:%[0-9]+]]:_(s32) = COPY [[C12]](s32) ; SI: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LOAD19]](s32) ; SI: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C13]] - ; SI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[COPY18]](s32) - ; SI: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) - ; SI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[TRUNC19]] + ; SI: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[COPY18]](s32) + ; SI: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[SHL12]](s32) + ; SI: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[TRUNC19]] ; SI: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD20]](s32) ; SI: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C11]] ; SI: [[COPY20:%[0-9]+]]:_(s32) = COPY [[C12]](s32) ; SI: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LOAD21]](s32) ; SI: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C13]] - ; SI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[COPY20]](s32) - ; SI: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[SHL10]](s32) - ; SI: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[TRUNC21]] + ; SI: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[COPY20]](s32) + ; SI: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[SHL13]](s32) + ; SI: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[TRUNC21]] ; SI: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32) ; SI: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C11]] ; SI: [[COPY22:%[0-9]+]]:_(s32) = COPY [[C12]](s32) ; SI: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LOAD23]](s32) ; SI: [[AND23:%[0-9]+]]:_(s32) = G_AND [[COPY23]], [[C13]] - ; SI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[COPY22]](s32) - ; SI: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[SHL11]](s32) - ; SI: [[OR11:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[TRUNC23]] - ; SI: [[MV1:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR6]](s16), [[OR7]](s16), [[OR8]](s16), [[OR9]](s16), [[OR10]](s16), [[OR11]](s16) + ; SI: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[COPY22]](s32) + ; SI: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[SHL14]](s32) + ; SI: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[TRUNC23]] + ; SI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; SI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR10]](s16) + ; SI: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C14]](s32) + ; SI: [[OR15:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL15]] + ; SI: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR11]](s16) + ; SI: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16) + ; SI: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C14]](s32) + ; SI: [[OR16:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL16]] + ; SI: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16) + ; SI: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16) + ; SI: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C14]](s32) + ; SI: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]] + ; SI: [[MV1:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR15]](s32), [[OR16]](s32), [[OR17]](s32) ; SI: [[COPY24:%[0-9]+]]:_(s96) = COPY [[MV]](s96) ; SI: [[COPY25:%[0-9]+]]:_(s96) = COPY [[MV1]](s96) ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY24]](s96) @@ -10648,9 +11653,22 @@ body: | ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C12]](s32) ; CI-MESA: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) ; CI-MESA: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] - ; CI-MESA: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16), [[OR4]](s16), [[OR5]](s16) - ; CI-MESA: [[C14:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; CI-MESA: [[GEP11:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C14]](s64) + ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI-MESA: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C14]](s32) + ; CI-MESA: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL6]] + ; CI-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CI-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C14]](s32) + ; CI-MESA: [[OR7:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL7]] + ; CI-MESA: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; CI-MESA: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) + ; CI-MESA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C14]](s32) + ; CI-MESA: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] + ; CI-MESA: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR6]](s32), [[OR7]](s32), [[OR8]](s32) + ; CI-MESA: [[C15:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 + ; CI-MESA: [[GEP11:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C15]](s64) ; CI-MESA: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[GEP11]](p1) :: (load 1, addrspace 1) ; CI-MESA: [[GEP12:%[0-9]+]]:_(p1) = G_GEP [[GEP11]], [[C]](s64) ; CI-MESA: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[GEP12]](p1) :: (load 1, addrspace 1) @@ -10679,50 +11697,62 @@ body: | ; CI-MESA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C12]](s32) ; CI-MESA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) ; CI-MESA: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C13]] - ; CI-MESA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY12]](s32) - ; CI-MESA: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) - ; CI-MESA: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] + ; CI-MESA: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY12]](s32) + ; CI-MESA: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) + ; CI-MESA: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] ; CI-MESA: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; CI-MESA: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C11]] ; CI-MESA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C12]](s32) ; CI-MESA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32) ; CI-MESA: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C13]] - ; CI-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY14]](s32) - ; CI-MESA: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) - ; CI-MESA: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] + ; CI-MESA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY14]](s32) + ; CI-MESA: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL10]](s32) + ; CI-MESA: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] ; CI-MESA: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD16]](s32) ; CI-MESA: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C11]] ; CI-MESA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[C12]](s32) ; CI-MESA: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LOAD17]](s32) ; CI-MESA: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C13]] - ; CI-MESA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[COPY16]](s32) - ; CI-MESA: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32) - ; CI-MESA: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[TRUNC17]] + ; CI-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[COPY16]](s32) + ; CI-MESA: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[SHL11]](s32) + ; CI-MESA: [[OR11:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[TRUNC17]] ; CI-MESA: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32) ; CI-MESA: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C11]] ; CI-MESA: [[COPY18:%[0-9]+]]:_(s32) = COPY [[C12]](s32) ; CI-MESA: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LOAD19]](s32) ; CI-MESA: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C13]] - ; CI-MESA: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[COPY18]](s32) - ; CI-MESA: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) - ; CI-MESA: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[TRUNC19]] + ; CI-MESA: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[COPY18]](s32) + ; CI-MESA: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[SHL12]](s32) + ; CI-MESA: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[TRUNC19]] ; CI-MESA: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD20]](s32) ; CI-MESA: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C11]] ; CI-MESA: [[COPY20:%[0-9]+]]:_(s32) = COPY [[C12]](s32) ; CI-MESA: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LOAD21]](s32) ; CI-MESA: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C13]] - ; CI-MESA: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[COPY20]](s32) - ; CI-MESA: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[SHL10]](s32) - ; CI-MESA: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[TRUNC21]] + ; CI-MESA: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[COPY20]](s32) + ; CI-MESA: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[SHL13]](s32) + ; CI-MESA: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[TRUNC21]] ; CI-MESA: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32) ; CI-MESA: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C11]] ; CI-MESA: [[COPY22:%[0-9]+]]:_(s32) = COPY [[C12]](s32) ; CI-MESA: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LOAD23]](s32) ; CI-MESA: [[AND23:%[0-9]+]]:_(s32) = G_AND [[COPY23]], [[C13]] - ; CI-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[COPY22]](s32) - ; CI-MESA: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[SHL11]](s32) - ; CI-MESA: [[OR11:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[TRUNC23]] - ; CI-MESA: [[MV1:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR6]](s16), [[OR7]](s16), [[OR8]](s16), [[OR9]](s16), [[OR10]](s16), [[OR11]](s16) + ; CI-MESA: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[COPY22]](s32) + ; CI-MESA: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[SHL14]](s32) + ; CI-MESA: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[TRUNC23]] + ; CI-MESA: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; CI-MESA: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR10]](s16) + ; CI-MESA: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C14]](s32) + ; CI-MESA: [[OR15:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL15]] + ; CI-MESA: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR11]](s16) + ; CI-MESA: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16) + ; CI-MESA: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C14]](s32) + ; CI-MESA: [[OR16:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL16]] + ; CI-MESA: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16) + ; CI-MESA: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16) + ; CI-MESA: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C14]](s32) + ; CI-MESA: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]] + ; CI-MESA: [[MV1:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR15]](s32), [[OR16]](s32), [[OR17]](s32) ; CI-MESA: [[COPY24:%[0-9]+]]:_(s96) = COPY [[MV]](s96) ; CI-MESA: [[COPY25:%[0-9]+]]:_(s96) = COPY [[MV1]](s96) ; CI-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[COPY24]](s96) @@ -10801,9 +11831,22 @@ body: | ; VI: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C11]] ; VI: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C12]](s16) ; VI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] - ; VI: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16), [[OR4]](s16), [[OR5]](s16) - ; VI: [[C13:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; VI: [[GEP11:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C13]](s64) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C13]](s32) + ; VI: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL6]] + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; VI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C13]](s32) + ; VI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL7]] + ; VI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; VI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) + ; VI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C13]](s32) + ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] + ; VI: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR6]](s32), [[OR7]](s32), [[OR8]](s32) + ; VI: [[C14:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 + ; VI: [[GEP11:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C14]](s64) ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[GEP11]](p1) :: (load 1, addrspace 1) ; VI: [[GEP12:%[0-9]+]]:_(p1) = G_GEP [[GEP11]], [[C]](s64) ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[GEP12]](p1) :: (load 1, addrspace 1) @@ -10831,39 +11874,51 @@ body: | ; VI: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C11]] ; VI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32) ; VI: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C11]] - ; VI: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C12]](s16) - ; VI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL6]] + ; VI: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C12]](s16) + ; VI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL9]] ; VI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; VI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C11]] ; VI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32) ; VI: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C11]] - ; VI: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C12]](s16) - ; VI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL7]] + ; VI: [[SHL10:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C12]](s16) + ; VI: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL10]] ; VI: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD16]](s32) ; VI: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C11]] ; VI: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD17]](s32) ; VI: [[AND17:%[0-9]+]]:_(s16) = G_AND [[TRUNC17]], [[C11]] - ; VI: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND17]], [[C12]](s16) - ; VI: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[SHL8]] + ; VI: [[SHL11:%[0-9]+]]:_(s16) = G_SHL [[AND17]], [[C12]](s16) + ; VI: [[OR11:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[SHL11]] ; VI: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32) ; VI: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C11]] ; VI: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD19]](s32) ; VI: [[AND19:%[0-9]+]]:_(s16) = G_AND [[TRUNC19]], [[C11]] - ; VI: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND19]], [[C12]](s16) - ; VI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[SHL9]] + ; VI: [[SHL12:%[0-9]+]]:_(s16) = G_SHL [[AND19]], [[C12]](s16) + ; VI: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[SHL12]] ; VI: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD20]](s32) ; VI: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C11]] ; VI: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD21]](s32) ; VI: [[AND21:%[0-9]+]]:_(s16) = G_AND [[TRUNC21]], [[C11]] - ; VI: [[SHL10:%[0-9]+]]:_(s16) = G_SHL [[AND21]], [[C12]](s16) - ; VI: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[SHL10]] + ; VI: [[SHL13:%[0-9]+]]:_(s16) = G_SHL [[AND21]], [[C12]](s16) + ; VI: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[SHL13]] ; VI: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32) ; VI: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C11]] ; VI: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD23]](s32) ; VI: [[AND23:%[0-9]+]]:_(s16) = G_AND [[TRUNC23]], [[C11]] - ; VI: [[SHL11:%[0-9]+]]:_(s16) = G_SHL [[AND23]], [[C12]](s16) - ; VI: [[OR11:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[SHL11]] - ; VI: [[MV1:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR6]](s16), [[OR7]](s16), [[OR8]](s16), [[OR9]](s16), [[OR10]](s16), [[OR11]](s16) + ; VI: [[SHL14:%[0-9]+]]:_(s16) = G_SHL [[AND23]], [[C12]](s16) + ; VI: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[SHL14]] + ; VI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; VI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR10]](s16) + ; VI: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C13]](s32) + ; VI: [[OR15:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL15]] + ; VI: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR11]](s16) + ; VI: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16) + ; VI: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C13]](s32) + ; VI: [[OR16:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL16]] + ; VI: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16) + ; VI: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16) + ; VI: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C13]](s32) + ; VI: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]] + ; VI: [[MV1:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR15]](s32), [[OR16]](s32), [[OR17]](s32) ; VI: [[COPY1:%[0-9]+]]:_(s96) = COPY [[MV]](s96) ; VI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[MV1]](s96) ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) @@ -10949,9 +12004,22 @@ body: | ; GFX9-MESA: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C11]] ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C12]](s16) ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] - ; GFX9-MESA: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16), [[OR4]](s16), [[OR5]](s16) - ; GFX9-MESA: [[C13:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; GFX9-MESA: [[GEP11:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C13]](s64) + ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9-MESA: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C13]](s32) + ; GFX9-MESA: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL6]] + ; GFX9-MESA: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; GFX9-MESA: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; GFX9-MESA: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C13]](s32) + ; GFX9-MESA: [[OR7:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL7]] + ; GFX9-MESA: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; GFX9-MESA: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) + ; GFX9-MESA: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C13]](s32) + ; GFX9-MESA: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] + ; GFX9-MESA: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR6]](s32), [[OR7]](s32), [[OR8]](s32) + ; GFX9-MESA: [[C14:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 + ; GFX9-MESA: [[GEP11:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C14]](s64) ; GFX9-MESA: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[GEP11]](p1) :: (load 1, addrspace 1) ; GFX9-MESA: [[GEP12:%[0-9]+]]:_(p1) = G_GEP [[GEP11]], [[C]](s64) ; GFX9-MESA: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[GEP12]](p1) :: (load 1, addrspace 1) @@ -10979,39 +12047,51 @@ body: | ; GFX9-MESA: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C11]] ; GFX9-MESA: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32) ; GFX9-MESA: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C11]] - ; GFX9-MESA: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C12]](s16) - ; GFX9-MESA: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL6]] + ; GFX9-MESA: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C12]](s16) + ; GFX9-MESA: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL9]] ; GFX9-MESA: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; GFX9-MESA: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C11]] ; GFX9-MESA: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32) ; GFX9-MESA: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C11]] - ; GFX9-MESA: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C12]](s16) - ; GFX9-MESA: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL7]] + ; GFX9-MESA: [[SHL10:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C12]](s16) + ; GFX9-MESA: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL10]] ; GFX9-MESA: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD16]](s32) ; GFX9-MESA: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C11]] ; GFX9-MESA: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD17]](s32) ; GFX9-MESA: [[AND17:%[0-9]+]]:_(s16) = G_AND [[TRUNC17]], [[C11]] - ; GFX9-MESA: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND17]], [[C12]](s16) - ; GFX9-MESA: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[SHL8]] + ; GFX9-MESA: [[SHL11:%[0-9]+]]:_(s16) = G_SHL [[AND17]], [[C12]](s16) + ; GFX9-MESA: [[OR11:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[SHL11]] ; GFX9-MESA: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32) ; GFX9-MESA: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C11]] ; GFX9-MESA: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD19]](s32) ; GFX9-MESA: [[AND19:%[0-9]+]]:_(s16) = G_AND [[TRUNC19]], [[C11]] - ; GFX9-MESA: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND19]], [[C12]](s16) - ; GFX9-MESA: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[SHL9]] + ; GFX9-MESA: [[SHL12:%[0-9]+]]:_(s16) = G_SHL [[AND19]], [[C12]](s16) + ; GFX9-MESA: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[SHL12]] ; GFX9-MESA: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD20]](s32) ; GFX9-MESA: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C11]] ; GFX9-MESA: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD21]](s32) ; GFX9-MESA: [[AND21:%[0-9]+]]:_(s16) = G_AND [[TRUNC21]], [[C11]] - ; GFX9-MESA: [[SHL10:%[0-9]+]]:_(s16) = G_SHL [[AND21]], [[C12]](s16) - ; GFX9-MESA: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[SHL10]] + ; GFX9-MESA: [[SHL13:%[0-9]+]]:_(s16) = G_SHL [[AND21]], [[C12]](s16) + ; GFX9-MESA: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[SHL13]] ; GFX9-MESA: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32) ; GFX9-MESA: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C11]] ; GFX9-MESA: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD23]](s32) ; GFX9-MESA: [[AND23:%[0-9]+]]:_(s16) = G_AND [[TRUNC23]], [[C11]] - ; GFX9-MESA: [[SHL11:%[0-9]+]]:_(s16) = G_SHL [[AND23]], [[C12]](s16) - ; GFX9-MESA: [[OR11:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[SHL11]] - ; GFX9-MESA: [[MV1:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR6]](s16), [[OR7]](s16), [[OR8]](s16), [[OR9]](s16), [[OR10]](s16), [[OR11]](s16) + ; GFX9-MESA: [[SHL14:%[0-9]+]]:_(s16) = G_SHL [[AND23]], [[C12]](s16) + ; GFX9-MESA: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[SHL14]] + ; GFX9-MESA: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; GFX9-MESA: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR10]](s16) + ; GFX9-MESA: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C13]](s32) + ; GFX9-MESA: [[OR15:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL15]] + ; GFX9-MESA: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR11]](s16) + ; GFX9-MESA: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16) + ; GFX9-MESA: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C13]](s32) + ; GFX9-MESA: [[OR16:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL16]] + ; GFX9-MESA: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16) + ; GFX9-MESA: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16) + ; GFX9-MESA: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C13]](s32) + ; GFX9-MESA: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]] + ; GFX9-MESA: [[MV1:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR15]](s32), [[OR16]](s32), [[OR17]](s32) ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s96) = COPY [[MV]](s96) ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s96) = COPY [[MV1]](s96) ; GFX9-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) @@ -11033,52 +12113,78 @@ body: | ; SI-LABEL: name: test_extload_global_v2s96_from_24_align2 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[GEP:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C]](s64) ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p1) :: (load 2, addrspace 1) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI: [[GEP1:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C1]](s64) ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p1) :: (load 2, addrspace 1) - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; SI: [[GEP2:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C2]](s64) ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p1) :: (load 2, addrspace 1) - ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI: [[GEP3:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C3]](s64) ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p1) :: (load 2, addrspace 1) - ; SI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10 ; SI: [[GEP4:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C4]](s64) ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p1) :: (load 2, addrspace 1) - ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) - ; SI: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16), [[TRUNC4]](s16), [[TRUNC5]](s16) - ; SI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; SI: [[GEP5:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C5]](s64) + ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C5]] + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]] + ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C5]] + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C5]] + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] + ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]] + ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; SI: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32), [[OR2]](s32) + ; SI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 + ; SI: [[GEP5:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C7]](s64) ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[GEP5]](p1) :: (load 2, addrspace 1) - ; SI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; SI: [[GEP6:%[0-9]+]]:_(p1) = G_GEP [[GEP5]], [[C]](s64) ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[GEP6]](p1) :: (load 2, addrspace 1) - ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) ; SI: [[GEP7:%[0-9]+]]:_(p1) = G_GEP [[GEP5]], [[C1]](s64) ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p1) :: (load 2, addrspace 1) - ; SI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32) ; SI: [[GEP8:%[0-9]+]]:_(p1) = G_GEP [[GEP5]], [[C2]](s64) ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p1) :: (load 2, addrspace 1) - ; SI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) ; SI: [[GEP9:%[0-9]+]]:_(p1) = G_GEP [[GEP5]], [[C3]](s64) ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[GEP9]](p1) :: (load 2, addrspace 1) - ; SI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; SI: [[GEP10:%[0-9]+]]:_(p1) = G_GEP [[GEP5]], [[C4]](s64) ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[GEP10]](p1) :: (load 2, addrspace 1) - ; SI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) - ; SI: [[MV1:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[TRUNC6]](s16), [[TRUNC7]](s16), [[TRUNC8]](s16), [[TRUNC9]](s16), [[TRUNC10]](s16), [[TRUNC11]](s16) - ; SI: [[COPY1:%[0-9]+]]:_(s96) = COPY [[MV]](s96) - ; SI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[MV1]](s96) - ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; SI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C5]] + ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) + ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C5]] + ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) + ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) + ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C5]] + ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) + ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C5]] + ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C6]](s32) + ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]] + ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32) + ; SI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C5]] + ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) + ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C5]] + ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32) + ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]] + ; SI: [[MV1:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR3]](s32), [[OR4]](s32), [[OR5]](s32) + ; SI: [[COPY13:%[0-9]+]]:_(s96) = COPY [[MV]](s96) + ; SI: [[COPY14:%[0-9]+]]:_(s96) = COPY [[MV1]](s96) + ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY13]](s96) + ; SI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY14]](s96) ; CI-HSA-LABEL: name: test_extload_global_v2s96_from_24_align2 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA: [[LOAD:%[0-9]+]]:_(<2 x s96>) = G_LOAD [[COPY]](p1) :: (load 24, align 2, addrspace 1) @@ -11089,101 +12195,153 @@ body: | ; CI-MESA-LABEL: name: test_extload_global_v2s96_from_24_align2 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI-MESA: [[GEP:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C]](s64) ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p1) :: (load 2, addrspace 1) - ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI-MESA: [[GEP1:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C1]](s64) ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p1) :: (load 2, addrspace 1) - ; CI-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; CI-MESA: [[GEP2:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C2]](s64) ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p1) :: (load 2, addrspace 1) - ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) ; CI-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CI-MESA: [[GEP3:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C3]](s64) ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p1) :: (load 2, addrspace 1) - ; CI-MESA: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; CI-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10 ; CI-MESA: [[GEP4:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C4]](s64) ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p1) :: (load 2, addrspace 1) - ; CI-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) - ; CI-MESA: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16), [[TRUNC4]](s16), [[TRUNC5]](s16) - ; CI-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; CI-MESA: [[GEP5:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C5]](s64) + ; CI-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C5]] + ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]] + ; CI-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C5]] + ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C5]] + ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] + ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; CI-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]] + ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CI-MESA: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32), [[OR2]](s32) + ; CI-MESA: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 + ; CI-MESA: [[GEP5:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C7]](s64) ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[GEP5]](p1) :: (load 2, addrspace 1) - ; CI-MESA: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; CI-MESA: [[GEP6:%[0-9]+]]:_(p1) = G_GEP [[GEP5]], [[C]](s64) ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[GEP6]](p1) :: (load 2, addrspace 1) - ; CI-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) ; CI-MESA: [[GEP7:%[0-9]+]]:_(p1) = G_GEP [[GEP5]], [[C1]](s64) ; CI-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p1) :: (load 2, addrspace 1) - ; CI-MESA: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32) ; CI-MESA: [[GEP8:%[0-9]+]]:_(p1) = G_GEP [[GEP5]], [[C2]](s64) ; CI-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p1) :: (load 2, addrspace 1) - ; CI-MESA: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) ; CI-MESA: [[GEP9:%[0-9]+]]:_(p1) = G_GEP [[GEP5]], [[C3]](s64) ; CI-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[GEP9]](p1) :: (load 2, addrspace 1) - ; CI-MESA: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; CI-MESA: [[GEP10:%[0-9]+]]:_(p1) = G_GEP [[GEP5]], [[C4]](s64) ; CI-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[GEP10]](p1) :: (load 2, addrspace 1) - ; CI-MESA: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) - ; CI-MESA: [[MV1:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[TRUNC6]](s16), [[TRUNC7]](s16), [[TRUNC8]](s16), [[TRUNC9]](s16), [[TRUNC10]](s16), [[TRUNC11]](s16) - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s96) = COPY [[MV]](s96) - ; CI-MESA: [[COPY2:%[0-9]+]]:_(s96) = COPY [[MV1]](s96) - ; CI-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; CI-MESA: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; CI-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; CI-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C5]] + ; CI-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) + ; CI-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C5]] + ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) + ; CI-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; CI-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) + ; CI-MESA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C5]] + ; CI-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) + ; CI-MESA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C5]] + ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C6]](s32) + ; CI-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]] + ; CI-MESA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32) + ; CI-MESA: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C5]] + ; CI-MESA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) + ; CI-MESA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C5]] + ; CI-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32) + ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]] + ; CI-MESA: [[MV1:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR3]](s32), [[OR4]](s32), [[OR5]](s32) + ; CI-MESA: [[COPY13:%[0-9]+]]:_(s96) = COPY [[MV]](s96) + ; CI-MESA: [[COPY14:%[0-9]+]]:_(s96) = COPY [[MV1]](s96) + ; CI-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[COPY13]](s96) + ; CI-MESA: $vgpr3_vgpr4_vgpr5 = COPY [[COPY14]](s96) ; VI-LABEL: name: test_extload_global_v2s96_from_24_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[GEP:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C]](s64) ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p1) :: (load 2, addrspace 1) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[GEP1:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C1]](s64) ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p1) :: (load 2, addrspace 1) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; VI: [[GEP2:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C2]](s64) ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p1) :: (load 2, addrspace 1) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI: [[GEP3:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C3]](s64) ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p1) :: (load 2, addrspace 1) - ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10 ; VI: [[GEP4:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C4]](s64) ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p1) :: (load 2, addrspace 1) - ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) - ; VI: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16), [[TRUNC4]](s16), [[TRUNC5]](s16) - ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; VI: [[GEP5:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C5]](s64) + ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C5]] + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]] + ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C5]] + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C5]] + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] + ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]] + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; VI: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32), [[OR2]](s32) + ; VI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 + ; VI: [[GEP5:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C7]](s64) ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[GEP5]](p1) :: (load 2, addrspace 1) - ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; VI: [[GEP6:%[0-9]+]]:_(p1) = G_GEP [[GEP5]], [[C]](s64) ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[GEP6]](p1) :: (load 2, addrspace 1) - ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) ; VI: [[GEP7:%[0-9]+]]:_(p1) = G_GEP [[GEP5]], [[C1]](s64) ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p1) :: (load 2, addrspace 1) - ; VI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32) ; VI: [[GEP8:%[0-9]+]]:_(p1) = G_GEP [[GEP5]], [[C2]](s64) ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p1) :: (load 2, addrspace 1) - ; VI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) ; VI: [[GEP9:%[0-9]+]]:_(p1) = G_GEP [[GEP5]], [[C3]](s64) ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[GEP9]](p1) :: (load 2, addrspace 1) - ; VI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; VI: [[GEP10:%[0-9]+]]:_(p1) = G_GEP [[GEP5]], [[C4]](s64) ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[GEP10]](p1) :: (load 2, addrspace 1) - ; VI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) - ; VI: [[MV1:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[TRUNC6]](s16), [[TRUNC7]](s16), [[TRUNC8]](s16), [[TRUNC9]](s16), [[TRUNC10]](s16), [[TRUNC11]](s16) - ; VI: [[COPY1:%[0-9]+]]:_(s96) = COPY [[MV]](s96) - ; VI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[MV1]](s96) - ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; VI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C5]] + ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) + ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C5]] + ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) + ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) + ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C5]] + ; VI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) + ; VI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C5]] + ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C6]](s32) + ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]] + ; VI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32) + ; VI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C5]] + ; VI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) + ; VI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C5]] + ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32) + ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]] + ; VI: [[MV1:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR3]](s32), [[OR4]](s32), [[OR5]](s32) + ; VI: [[COPY13:%[0-9]+]]:_(s96) = COPY [[MV]](s96) + ; VI: [[COPY14:%[0-9]+]]:_(s96) = COPY [[MV1]](s96) + ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY13]](s96) + ; VI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY14]](s96) ; GFX9-HSA-LABEL: name: test_extload_global_v2s96_from_24_align2 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<2 x s96>) = G_LOAD [[COPY]](p1) :: (load 24, align 2, addrspace 1) @@ -11194,52 +12352,78 @@ body: | ; GFX9-MESA-LABEL: name: test_extload_global_v2s96_from_24_align2 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-MESA: [[GEP:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C]](s64) ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p1) :: (load 2, addrspace 1) - ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9-MESA: [[GEP1:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C1]](s64) ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p1) :: (load 2, addrspace 1) - ; GFX9-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; GFX9-MESA: [[GEP2:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C2]](s64) ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p1) :: (load 2, addrspace 1) - ; GFX9-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) ; GFX9-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; GFX9-MESA: [[GEP3:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C3]](s64) ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p1) :: (load 2, addrspace 1) - ; GFX9-MESA: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; GFX9-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 10 ; GFX9-MESA: [[GEP4:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C4]](s64) ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p1) :: (load 2, addrspace 1) - ; GFX9-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) - ; GFX9-MESA: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16), [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; GFX9-MESA: [[GEP5:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C5]](s64) + ; GFX9-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C5]] + ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]] + ; GFX9-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; GFX9-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C5]] + ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C5]] + ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; GFX9-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; GFX9-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] + ; GFX9-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; GFX9-MESA: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]] + ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; GFX9-MESA: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32), [[OR2]](s32) + ; GFX9-MESA: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 + ; GFX9-MESA: [[GEP5:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C7]](s64) ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[GEP5]](p1) :: (load 2, addrspace 1) - ; GFX9-MESA: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; GFX9-MESA: [[GEP6:%[0-9]+]]:_(p1) = G_GEP [[GEP5]], [[C]](s64) ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[GEP6]](p1) :: (load 2, addrspace 1) - ; GFX9-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) ; GFX9-MESA: [[GEP7:%[0-9]+]]:_(p1) = G_GEP [[GEP5]], [[C1]](s64) ; GFX9-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p1) :: (load 2, addrspace 1) - ; GFX9-MESA: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32) ; GFX9-MESA: [[GEP8:%[0-9]+]]:_(p1) = G_GEP [[GEP5]], [[C2]](s64) ; GFX9-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p1) :: (load 2, addrspace 1) - ; GFX9-MESA: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) ; GFX9-MESA: [[GEP9:%[0-9]+]]:_(p1) = G_GEP [[GEP5]], [[C3]](s64) ; GFX9-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[GEP9]](p1) :: (load 2, addrspace 1) - ; GFX9-MESA: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; GFX9-MESA: [[GEP10:%[0-9]+]]:_(p1) = G_GEP [[GEP5]], [[C4]](s64) ; GFX9-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[GEP10]](p1) :: (load 2, addrspace 1) - ; GFX9-MESA: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) - ; GFX9-MESA: [[MV1:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[TRUNC6]](s16), [[TRUNC7]](s16), [[TRUNC8]](s16), [[TRUNC9]](s16), [[TRUNC10]](s16), [[TRUNC11]](s16) - ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s96) = COPY [[MV]](s96) - ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s96) = COPY [[MV1]](s96) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX9-MESA: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX9-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; GFX9-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C5]] + ; GFX9-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) + ; GFX9-MESA: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C5]] + ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) + ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; GFX9-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) + ; GFX9-MESA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C5]] + ; GFX9-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) + ; GFX9-MESA: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C5]] + ; GFX9-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C6]](s32) + ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]] + ; GFX9-MESA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32) + ; GFX9-MESA: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C5]] + ; GFX9-MESA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) + ; GFX9-MESA: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C5]] + ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32) + ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]] + ; GFX9-MESA: [[MV1:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR3]](s32), [[OR4]](s32), [[OR5]](s32) + ; GFX9-MESA: [[COPY13:%[0-9]+]]:_(s96) = COPY [[MV]](s96) + ; GFX9-MESA: [[COPY14:%[0-9]+]]:_(s96) = COPY [[MV1]](s96) + ; GFX9-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[COPY13]](s96) + ; GFX9-MESA: $vgpr3_vgpr4_vgpr5 = COPY [[COPY14]](s96) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<2 x s96>) = G_LOAD %0 :: (load 24, align 2, addrspace 1) %2:_(s96) = G_EXTRACT %1, 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir index 69a568cce314e..d5b869a560e91 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir @@ -385,53 +385,78 @@ body: | ; SI-LABEL: name: test_load_local_s32_align2 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[GEP:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C]](s32) ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p3) :: (load 2, addrspace 3) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; SI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; SI: $vgpr0 = COPY [[MV]](s32) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; SI: $vgpr0 = COPY [[OR]](s32) ; CI-LABEL: name: test_load_local_s32_align2 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) - ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[GEP:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C]](s32) ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p3) :: (load 2, addrspace 3) - ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; CI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; CI: $vgpr0 = COPY [[MV]](s32) + ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI: $vgpr0 = COPY [[OR]](s32) ; CI-DS128-LABEL: name: test_load_local_s32_align2 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) - ; CI-DS128: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI-DS128: [[GEP:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C]](s32) ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p3) :: (load 2, addrspace 3) - ; CI-DS128: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; CI-DS128: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; CI-DS128: $vgpr0 = COPY [[MV]](s32) + ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-DS128: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-DS128: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CI-DS128: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-DS128: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-DS128: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; CI-DS128: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI-DS128: $vgpr0 = COPY [[OR]](s32) ; VI-LABEL: name: test_load_local_s32_align2 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[GEP:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C]](s32) ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p3) :: (load 2, addrspace 3) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; VI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; VI: $vgpr0 = COPY [[MV]](s32) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; VI: $vgpr0 = COPY [[OR]](s32) ; GFX9-LABEL: name: test_load_local_s32_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[GEP:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C]](s32) ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p3) :: (load 2, addrspace 3) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9: $vgpr0 = COPY [[MV]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX9: $vgpr0 = COPY [[OR]](s32) %0:_(p3) = COPY $vgpr0 %1:_(s32) = G_LOAD %0 :: (load 4, align 2, addrspace 3) $vgpr0 = COPY %1 @@ -473,8 +498,12 @@ body: | ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; SI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; SI: $vgpr0 = COPY [[MV]](s32) + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32) + ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; SI: $vgpr0 = COPY [[OR2]](s32) ; CI-LABEL: name: test_load_local_s32_align1 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) @@ -505,8 +534,12 @@ body: | ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; CI: $vgpr0 = COPY [[MV]](s32) + ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32) + ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; CI: $vgpr0 = COPY [[OR2]](s32) ; CI-DS128-LABEL: name: test_load_local_s32_align1 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) @@ -537,8 +570,12 @@ body: | ; CI-DS128: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) ; CI-DS128: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CI-DS128: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI-DS128: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; CI-DS128: $vgpr0 = COPY [[MV]](s32) + ; CI-DS128: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI-DS128: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI-DS128: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-DS128: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32) + ; CI-DS128: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; CI-DS128: $vgpr0 = COPY [[OR2]](s32) ; VI-LABEL: name: test_load_local_s32_align1 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) @@ -565,8 +602,12 @@ body: | ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; VI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; VI: $vgpr0 = COPY [[MV]](s32) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; VI: $vgpr0 = COPY [[OR2]](s32) ; GFX9-LABEL: name: test_load_local_s32_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) @@ -593,8 +634,12 @@ body: | ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX9: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; GFX9: $vgpr0 = COPY [[MV]](s32) + ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; GFX9: $vgpr0 = COPY [[OR2]](s32) %0:_(p3) = COPY $vgpr0 %1:_(s32) = G_LOAD %0 :: (load 4, align 1, addrspace 3) $vgpr0 = COPY %1 @@ -708,92 +753,142 @@ body: | ; SI-LABEL: name: test_load_local_s64_align2 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[GEP:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C]](s32) ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p3) :: (load 2, addrspace 3) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[GEP1:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C1]](s32) ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p3) :: (load 2, addrspace 3) - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 ; SI: [[GEP2:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C2]](s32) ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p3) :: (load 2, addrspace 3) - ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) + ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; SI: $vgpr0_vgpr1 = COPY [[MV]](s64) ; CI-LABEL: name: test_load_local_s64_align2 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) - ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[GEP:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C]](s32) ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p3) :: (load 2, addrspace 3) - ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[GEP1:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C1]](s32) ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p3) :: (load 2, addrspace 3) - ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 ; CI: [[GEP2:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C2]](s32) ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p3) :: (load 2, addrspace 3) - ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) + ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; CI: $vgpr0_vgpr1 = COPY [[MV]](s64) ; CI-DS128-LABEL: name: test_load_local_s64_align2 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) - ; CI-DS128: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI-DS128: [[GEP:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C]](s32) ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p3) :: (load 2, addrspace 3) - ; CI-DS128: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI-DS128: [[GEP1:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C1]](s32) ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p3) :: (load 2, addrspace 3) - ; CI-DS128: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 ; CI-DS128: [[GEP2:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C2]](s32) ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p3) :: (load 2, addrspace 3) - ; CI-DS128: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; CI-DS128: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) + ; CI-DS128: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-DS128: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-DS128: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; CI-DS128: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-DS128: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; CI-DS128: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-DS128: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; CI-DS128: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI-DS128: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI-DS128: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; CI-DS128: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI-DS128: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; CI-DS128: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; CI-DS128: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI-DS128: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; CI-DS128: $vgpr0_vgpr1 = COPY [[MV]](s64) ; VI-LABEL: name: test_load_local_s64_align2 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[GEP:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C]](s32) ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p3) :: (load 2, addrspace 3) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[GEP1:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C1]](s32) ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p3) :: (load 2, addrspace 3) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 ; VI: [[GEP2:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C2]](s32) ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p3) :: (load 2, addrspace 3) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) + ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; VI: $vgpr0_vgpr1 = COPY [[MV]](s64) ; GFX9-LABEL: name: test_load_local_s64_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[GEP:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C]](s32) ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p3) :: (load 2, addrspace 3) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[GEP1:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C1]](s32) ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p3) :: (load 2, addrspace 3) - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 ; GFX9: [[GEP2:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C2]](s32) ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p3) :: (load 2, addrspace 3) - ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) + ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[MV]](s64) %0:_(p3) = COPY $vgpr0 %1:_(s64) = G_LOAD %0 :: (load 8, align 2, addrspace 3) @@ -864,7 +959,16 @@ body: | ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32) ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; SI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32) + ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32) + ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; SI: $vgpr0_vgpr1 = COPY [[MV]](s64) ; CI-LABEL: name: test_load_local_s64_align1 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 @@ -924,7 +1028,16 @@ body: | ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32) ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) + ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32) + ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; CI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32) + ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; CI: $vgpr0_vgpr1 = COPY [[MV]](s64) ; CI-DS128-LABEL: name: test_load_local_s64_align1 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 @@ -984,7 +1097,16 @@ body: | ; CI-DS128: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32) ; CI-DS128: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; CI-DS128: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI-DS128: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) + ; CI-DS128: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI-DS128: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI-DS128: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-DS128: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32) + ; CI-DS128: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; CI-DS128: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CI-DS128: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI-DS128: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32) + ; CI-DS128: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; CI-DS128: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; CI-DS128: $vgpr0_vgpr1 = COPY [[MV]](s64) ; VI-LABEL: name: test_load_local_s64_align1 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 @@ -1036,7 +1158,16 @@ body: | ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]] ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16) ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) + ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) + ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; VI: $vgpr0_vgpr1 = COPY [[MV]](s64) ; GFX9-LABEL: name: test_load_local_s64_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 @@ -1088,7 +1219,16 @@ body: | ; GFX9: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]] ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16) ; GFX9: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) + ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) + ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) + ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[MV]](s64) %0:_(p3) = COPY $vgpr0 %1:_(s64) = G_LOAD %0 :: (load 8, align 1, addrspace 3) @@ -1160,7 +1300,16 @@ body: | ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY7]](s32) ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; SI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32) + ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32) + ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; SI: [[GEP7:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C8]](s32) ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p3) :: (load 1, addrspace 3) ; SI: [[GEP8:%[0-9]+]]:_(p3) = G_GEP [[GEP7]], [[C]](s32) @@ -1174,21 +1323,24 @@ body: | ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C9]] - ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY9]](s32) - ; SI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; SI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] + ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY9]](s32) + ; SI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) + ; SI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] ; SI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; SI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C9]] - ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY11]](s32) - ; SI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) - ; SI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] - ; SI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16) + ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY11]](s32) + ; SI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) + ; SI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] + ; SI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; SI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; SI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C10]](s32) + ; SI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] ; SI: [[DEF:%[0-9]+]]:_(s96) = G_IMPLICIT_DEF ; SI: [[INSERT:%[0-9]+]]:_(s96) = G_INSERT [[DEF]], [[MV]](s64), 0 - ; SI: [[INSERT1:%[0-9]+]]:_(s96) = G_INSERT [[INSERT]], [[MV1]](s32), 64 + ; SI: [[INSERT1:%[0-9]+]]:_(s96) = G_INSERT [[INSERT]], [[OR8]](s32), 64 ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT1]](s96) ; CI-LABEL: name: test_load_local_s96_align16 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 @@ -1249,7 +1401,16 @@ body: | ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY7]](s32) ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) + ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32) + ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; CI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32) + ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; CI: [[GEP7:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C8]](s32) ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p3) :: (load 1, addrspace 3) ; CI: [[GEP8:%[0-9]+]]:_(p3) = G_GEP [[GEP7]], [[C]](s32) @@ -1263,21 +1424,24 @@ body: | ; CI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) ; CI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C9]] - ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY9]](s32) - ; CI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; CI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] + ; CI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY9]](s32) + ; CI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) + ; CI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] ; CI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; CI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] ; CI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) ; CI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C9]] - ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY11]](s32) - ; CI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) - ; CI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] - ; CI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16) + ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY11]](s32) + ; CI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) + ; CI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] + ; CI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; CI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; CI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C10]](s32) + ; CI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] ; CI: [[DEF:%[0-9]+]]:_(s96) = G_IMPLICIT_DEF ; CI: [[INSERT:%[0-9]+]]:_(s96) = G_INSERT [[DEF]], [[MV]](s64), 0 - ; CI: [[INSERT1:%[0-9]+]]:_(s96) = G_INSERT [[INSERT]], [[MV1]](s32), 64 + ; CI: [[INSERT1:%[0-9]+]]:_(s96) = G_INSERT [[INSERT]], [[OR8]](s32), 64 ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT1]](s96) ; CI-DS128-LABEL: name: test_load_local_s96_align16 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 @@ -1365,7 +1529,20 @@ body: | ; CI-DS128: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY11]](s32) ; CI-DS128: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) ; CI-DS128: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] - ; CI-DS128: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16), [[OR4]](s16), [[OR5]](s16) + ; CI-DS128: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI-DS128: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI-DS128: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-DS128: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C13]](s32) + ; CI-DS128: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL6]] + ; CI-DS128: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CI-DS128: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI-DS128: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C13]](s32) + ; CI-DS128: [[OR7:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL7]] + ; CI-DS128: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; CI-DS128: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) + ; CI-DS128: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C13]](s32) + ; CI-DS128: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] + ; CI-DS128: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR6]](s32), [[OR7]](s32), [[OR8]](s32) ; CI-DS128: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) ; VI-LABEL: name: test_load_local_s96_align16 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 @@ -1417,9 +1594,18 @@ body: | ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]] ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16) ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) - ; VI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI: [[GEP7:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C9]](s32) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) + ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) + ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) + ; VI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; VI: [[GEP7:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C10]](s32) ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p3) :: (load 1, addrspace 3) ; VI: [[GEP8:%[0-9]+]]:_(p3) = G_GEP [[GEP7]], [[C]](s32) ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p3) :: (load 1, addrspace 3) @@ -1431,18 +1617,21 @@ body: | ; VI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] ; VI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) ; VI: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]] - ; VI: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) - ; VI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]] + ; VI: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) + ; VI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]] ; VI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; VI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] ; VI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) ; VI: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]] - ; VI: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) - ; VI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] - ; VI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16) + ; VI: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) + ; VI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]] + ; VI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; VI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; VI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32) + ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] ; VI: [[DEF:%[0-9]+]]:_(s96) = G_IMPLICIT_DEF ; VI: [[INSERT:%[0-9]+]]:_(s96) = G_INSERT [[DEF]], [[MV]](s64), 0 - ; VI: [[INSERT1:%[0-9]+]]:_(s96) = G_INSERT [[INSERT]], [[MV1]](s32), 64 + ; VI: [[INSERT1:%[0-9]+]]:_(s96) = G_INSERT [[INSERT]], [[OR8]](s32), 64 ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT1]](s96) ; GFX9-LABEL: name: test_load_local_s96_align16 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 @@ -1494,9 +1683,18 @@ body: | ; GFX9: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]] ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16) ; GFX9: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) - ; GFX9: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9: [[GEP7:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C9]](s32) + ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) + ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) + ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) + ; GFX9: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9: [[GEP7:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C10]](s32) ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p3) :: (load 1, addrspace 3) ; GFX9: [[GEP8:%[0-9]+]]:_(p3) = G_GEP [[GEP7]], [[C]](s32) ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p3) :: (load 1, addrspace 3) @@ -1508,18 +1706,21 @@ body: | ; GFX9: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] ; GFX9: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) ; GFX9: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]] - ; GFX9: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) - ; GFX9: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]] + ; GFX9: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) + ; GFX9: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]] ; GFX9: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; GFX9: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] ; GFX9: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) ; GFX9: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]] - ; GFX9: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) - ; GFX9: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] - ; GFX9: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16) + ; GFX9: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) + ; GFX9: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]] + ; GFX9: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; GFX9: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; GFX9: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32) + ; GFX9: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] ; GFX9: [[DEF:%[0-9]+]]:_(s96) = G_IMPLICIT_DEF ; GFX9: [[INSERT:%[0-9]+]]:_(s96) = G_INSERT [[DEF]], [[MV]](s64), 0 - ; GFX9: [[INSERT1:%[0-9]+]]:_(s96) = G_INSERT [[INSERT]], [[MV1]](s32), 64 + ; GFX9: [[INSERT1:%[0-9]+]]:_(s96) = G_INSERT [[INSERT]], [[OR8]](s32), 64 ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT1]](s96) %0:_(p3) = COPY $vgpr0 %1:_(s96) = G_LOAD %0 :: (load 12, align 1, addrspace 3) @@ -1645,144 +1846,210 @@ body: | ; SI-LABEL: name: test_load_local_s96_align2 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[GEP:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C]](s32) ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p3) :: (load 2, addrspace 3) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[GEP1:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C1]](s32) ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p3) :: (load 2, addrspace 3) - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 ; SI: [[GEP2:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C2]](s32) ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p3) :: (load 2, addrspace 3) - ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) - ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI: [[GEP3:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C3]](s32) + ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) + ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; SI: [[GEP3:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C5]](s32) ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p3) :: (load 2, addrspace 3) - ; SI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; SI: [[GEP4:%[0-9]+]]:_(p3) = G_GEP [[GEP3]], [[C]](s32) ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p3) :: (load 2, addrspace 3) - ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) - ; SI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC4]](s16), [[TRUNC5]](s16) + ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] + ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] + ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) + ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; SI: [[DEF:%[0-9]+]]:_(s96) = G_IMPLICIT_DEF ; SI: [[INSERT:%[0-9]+]]:_(s96) = G_INSERT [[DEF]], [[MV]](s64), 0 - ; SI: [[INSERT1:%[0-9]+]]:_(s96) = G_INSERT [[INSERT]], [[MV1]](s32), 64 + ; SI: [[INSERT1:%[0-9]+]]:_(s96) = G_INSERT [[INSERT]], [[OR2]](s32), 64 ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT1]](s96) ; CI-LABEL: name: test_load_local_s96_align2 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) - ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[GEP:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C]](s32) ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p3) :: (load 2, addrspace 3) - ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[GEP1:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C1]](s32) ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p3) :: (load 2, addrspace 3) - ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 ; CI: [[GEP2:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C2]](s32) ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p3) :: (load 2, addrspace 3) - ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) - ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI: [[GEP3:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C3]](s32) + ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) + ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI: [[GEP3:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C5]](s32) ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p3) :: (load 2, addrspace 3) - ; CI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; CI: [[GEP4:%[0-9]+]]:_(p3) = G_GEP [[GEP3]], [[C]](s32) ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p3) :: (load 2, addrspace 3) - ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) - ; CI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC4]](s16), [[TRUNC5]](s16) + ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] + ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] + ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) + ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; CI: [[DEF:%[0-9]+]]:_(s96) = G_IMPLICIT_DEF ; CI: [[INSERT:%[0-9]+]]:_(s96) = G_INSERT [[DEF]], [[MV]](s64), 0 - ; CI: [[INSERT1:%[0-9]+]]:_(s96) = G_INSERT [[INSERT]], [[MV1]](s32), 64 + ; CI: [[INSERT1:%[0-9]+]]:_(s96) = G_INSERT [[INSERT]], [[OR2]](s32), 64 ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT1]](s96) ; CI-DS128-LABEL: name: test_load_local_s96_align2 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) - ; CI-DS128: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI-DS128: [[GEP:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C]](s32) ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p3) :: (load 2, addrspace 3) - ; CI-DS128: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI-DS128: [[GEP1:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C1]](s32) ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p3) :: (load 2, addrspace 3) - ; CI-DS128: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 ; CI-DS128: [[GEP2:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C2]](s32) ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p3) :: (load 2, addrspace 3) - ; CI-DS128: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) ; CI-DS128: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI-DS128: [[GEP3:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C3]](s32) ; CI-DS128: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p3) :: (load 2, addrspace 3) - ; CI-DS128: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; CI-DS128: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 ; CI-DS128: [[GEP4:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C4]](s32) ; CI-DS128: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p3) :: (load 2, addrspace 3) - ; CI-DS128: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) - ; CI-DS128: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16), [[TRUNC4]](s16), [[TRUNC5]](s16) + ; CI-DS128: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-DS128: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-DS128: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C5]] + ; CI-DS128: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-DS128: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]] + ; CI-DS128: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-DS128: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; CI-DS128: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI-DS128: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI-DS128: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C5]] + ; CI-DS128: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI-DS128: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C5]] + ; CI-DS128: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; CI-DS128: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI-DS128: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; CI-DS128: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] + ; CI-DS128: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; CI-DS128: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]] + ; CI-DS128: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; CI-DS128: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CI-DS128: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32), [[OR2]](s32) ; CI-DS128: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) ; VI-LABEL: name: test_load_local_s96_align2 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[GEP:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C]](s32) ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p3) :: (load 2, addrspace 3) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[GEP1:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C1]](s32) ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p3) :: (load 2, addrspace 3) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 ; VI: [[GEP2:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C2]](s32) ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p3) :: (load 2, addrspace 3) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) - ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI: [[GEP3:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C3]](s32) + ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) + ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; VI: [[GEP3:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C5]](s32) ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p3) :: (load 2, addrspace 3) - ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; VI: [[GEP4:%[0-9]+]]:_(p3) = G_GEP [[GEP3]], [[C]](s32) ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p3) :: (load 2, addrspace 3) - ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) - ; VI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC4]](s16), [[TRUNC5]](s16) + ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] + ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; VI: [[DEF:%[0-9]+]]:_(s96) = G_IMPLICIT_DEF ; VI: [[INSERT:%[0-9]+]]:_(s96) = G_INSERT [[DEF]], [[MV]](s64), 0 - ; VI: [[INSERT1:%[0-9]+]]:_(s96) = G_INSERT [[INSERT]], [[MV1]](s32), 64 + ; VI: [[INSERT1:%[0-9]+]]:_(s96) = G_INSERT [[INSERT]], [[OR2]](s32), 64 ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT1]](s96) ; GFX9-LABEL: name: test_load_local_s96_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[GEP:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C]](s32) ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p3) :: (load 2, addrspace 3) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[GEP1:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C1]](s32) ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p3) :: (load 2, addrspace 3) - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 ; GFX9: [[GEP2:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C2]](s32) ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p3) :: (load 2, addrspace 3) - ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9: [[GEP3:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C3]](s32) + ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) + ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9: [[GEP3:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C5]](s32) ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p3) :: (load 2, addrspace 3) - ; GFX9: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; GFX9: [[GEP4:%[0-9]+]]:_(p3) = G_GEP [[GEP3]], [[C]](s32) ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p3) :: (load 2, addrspace 3) - ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) - ; GFX9: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC4]](s16), [[TRUNC5]](s16) + ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] + ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] + ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) + ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; GFX9: [[DEF:%[0-9]+]]:_(s96) = G_IMPLICIT_DEF ; GFX9: [[INSERT:%[0-9]+]]:_(s96) = G_INSERT [[DEF]], [[MV]](s64), 0 - ; GFX9: [[INSERT1:%[0-9]+]]:_(s96) = G_INSERT [[INSERT]], [[MV1]](s32), 64 + ; GFX9: [[INSERT1:%[0-9]+]]:_(s96) = G_INSERT [[INSERT]], [[OR2]](s32), 64 ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT1]](s96) %0:_(p3) = COPY $vgpr0 %1:_(s96) = G_LOAD %0 :: (load 12, align 2, addrspace 3) @@ -1854,7 +2121,16 @@ body: | ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY7]](s32) ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; SI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32) + ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32) + ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; SI: [[GEP7:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C8]](s32) ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p3) :: (load 1, addrspace 3) ; SI: [[GEP8:%[0-9]+]]:_(p3) = G_GEP [[GEP7]], [[C]](s32) @@ -1868,21 +2144,24 @@ body: | ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C9]] - ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY9]](s32) - ; SI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; SI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] + ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY9]](s32) + ; SI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) + ; SI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] ; SI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; SI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C9]] - ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY11]](s32) - ; SI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) - ; SI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] - ; SI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16) + ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY11]](s32) + ; SI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) + ; SI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] + ; SI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; SI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; SI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C10]](s32) + ; SI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] ; SI: [[DEF:%[0-9]+]]:_(s96) = G_IMPLICIT_DEF ; SI: [[INSERT:%[0-9]+]]:_(s96) = G_INSERT [[DEF]], [[MV]](s64), 0 - ; SI: [[INSERT1:%[0-9]+]]:_(s96) = G_INSERT [[INSERT]], [[MV1]](s32), 64 + ; SI: [[INSERT1:%[0-9]+]]:_(s96) = G_INSERT [[INSERT]], [[OR8]](s32), 64 ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT1]](s96) ; CI-LABEL: name: test_load_local_s96_align1 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 @@ -1943,7 +2222,16 @@ body: | ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY7]](s32) ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) + ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32) + ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; CI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32) + ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; CI: [[GEP7:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C8]](s32) ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p3) :: (load 1, addrspace 3) ; CI: [[GEP8:%[0-9]+]]:_(p3) = G_GEP [[GEP7]], [[C]](s32) @@ -1957,21 +2245,24 @@ body: | ; CI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) ; CI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C9]] - ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY9]](s32) - ; CI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; CI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] + ; CI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY9]](s32) + ; CI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) + ; CI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] ; CI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; CI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] ; CI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) ; CI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C9]] - ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY11]](s32) - ; CI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) - ; CI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] - ; CI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16) + ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY11]](s32) + ; CI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) + ; CI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] + ; CI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; CI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; CI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C10]](s32) + ; CI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] ; CI: [[DEF:%[0-9]+]]:_(s96) = G_IMPLICIT_DEF ; CI: [[INSERT:%[0-9]+]]:_(s96) = G_INSERT [[DEF]], [[MV]](s64), 0 - ; CI: [[INSERT1:%[0-9]+]]:_(s96) = G_INSERT [[INSERT]], [[MV1]](s32), 64 + ; CI: [[INSERT1:%[0-9]+]]:_(s96) = G_INSERT [[INSERT]], [[OR8]](s32), 64 ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT1]](s96) ; CI-DS128-LABEL: name: test_load_local_s96_align1 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 @@ -2059,7 +2350,20 @@ body: | ; CI-DS128: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY11]](s32) ; CI-DS128: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) ; CI-DS128: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] - ; CI-DS128: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16), [[OR4]](s16), [[OR5]](s16) + ; CI-DS128: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI-DS128: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI-DS128: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-DS128: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C13]](s32) + ; CI-DS128: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL6]] + ; CI-DS128: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CI-DS128: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI-DS128: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C13]](s32) + ; CI-DS128: [[OR7:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL7]] + ; CI-DS128: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; CI-DS128: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) + ; CI-DS128: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C13]](s32) + ; CI-DS128: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] + ; CI-DS128: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR6]](s32), [[OR7]](s32), [[OR8]](s32) ; CI-DS128: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) ; VI-LABEL: name: test_load_local_s96_align1 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 @@ -2111,9 +2415,18 @@ body: | ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]] ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16) ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) - ; VI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI: [[GEP7:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C9]](s32) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) + ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) + ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) + ; VI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; VI: [[GEP7:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C10]](s32) ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p3) :: (load 1, addrspace 3) ; VI: [[GEP8:%[0-9]+]]:_(p3) = G_GEP [[GEP7]], [[C]](s32) ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p3) :: (load 1, addrspace 3) @@ -2125,18 +2438,21 @@ body: | ; VI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] ; VI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) ; VI: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]] - ; VI: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) - ; VI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]] + ; VI: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) + ; VI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]] ; VI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; VI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] ; VI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) ; VI: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]] - ; VI: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) - ; VI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] - ; VI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16) + ; VI: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) + ; VI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]] + ; VI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; VI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; VI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32) + ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] ; VI: [[DEF:%[0-9]+]]:_(s96) = G_IMPLICIT_DEF ; VI: [[INSERT:%[0-9]+]]:_(s96) = G_INSERT [[DEF]], [[MV]](s64), 0 - ; VI: [[INSERT1:%[0-9]+]]:_(s96) = G_INSERT [[INSERT]], [[MV1]](s32), 64 + ; VI: [[INSERT1:%[0-9]+]]:_(s96) = G_INSERT [[INSERT]], [[OR8]](s32), 64 ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT1]](s96) ; GFX9-LABEL: name: test_load_local_s96_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 @@ -2188,9 +2504,18 @@ body: | ; GFX9: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]] ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16) ; GFX9: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) - ; GFX9: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9: [[GEP7:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C9]](s32) + ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) + ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) + ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) + ; GFX9: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9: [[GEP7:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C10]](s32) ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p3) :: (load 1, addrspace 3) ; GFX9: [[GEP8:%[0-9]+]]:_(p3) = G_GEP [[GEP7]], [[C]](s32) ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p3) :: (load 1, addrspace 3) @@ -2202,18 +2527,21 @@ body: | ; GFX9: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] ; GFX9: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) ; GFX9: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]] - ; GFX9: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) - ; GFX9: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]] + ; GFX9: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) + ; GFX9: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]] ; GFX9: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; GFX9: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] ; GFX9: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) ; GFX9: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]] - ; GFX9: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) - ; GFX9: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] - ; GFX9: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16) + ; GFX9: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) + ; GFX9: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]] + ; GFX9: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; GFX9: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; GFX9: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32) + ; GFX9: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] ; GFX9: [[DEF:%[0-9]+]]:_(s96) = G_IMPLICIT_DEF ; GFX9: [[INSERT:%[0-9]+]]:_(s96) = G_INSERT [[DEF]], [[MV]](s64), 0 - ; GFX9: [[INSERT1:%[0-9]+]]:_(s96) = G_INSERT [[INSERT]], [[MV1]](s32), 64 + ; GFX9: [[INSERT1:%[0-9]+]]:_(s96) = G_INSERT [[INSERT]], [[OR8]](s32), 64 ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT1]](s96) %0:_(p3) = COPY $vgpr0 %1:_(s96) = G_LOAD %0 :: (load 12, align 1, addrspace 3) @@ -2285,7 +2613,16 @@ body: | ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY7]](s32) ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; SI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32) + ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32) + ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; SI: [[GEP7:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C8]](s32) ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p3) :: (load 1, addrspace 3) ; SI: [[GEP8:%[0-9]+]]:_(p3) = G_GEP [[GEP7]], [[C]](s32) @@ -2307,34 +2644,42 @@ body: | ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C9]] - ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY9]](s32) - ; SI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; SI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] + ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY9]](s32) + ; SI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) + ; SI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] ; SI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; SI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C9]] - ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY11]](s32) - ; SI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) - ; SI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] + ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY11]](s32) + ; SI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) + ; SI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] ; SI: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32) ; SI: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]] ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; SI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) ; SI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C9]] - ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY13]](s32) - ; SI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) - ; SI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] + ; SI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY13]](s32) + ; SI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32) + ; SI: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] ; SI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; SI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]] ; SI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; SI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32) ; SI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C9]] - ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY15]](s32) - ; SI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) - ; SI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] - ; SI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16), [[OR6]](s16), [[OR7]](s16) + ; SI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY15]](s32) + ; SI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) + ; SI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] + ; SI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; SI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; SI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C10]](s32) + ; SI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] + ; SI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) + ; SI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; SI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C10]](s32) + ; SI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; SI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) ; SI: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[MV1]](s64) ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV2]](s128) ; CI-LABEL: name: test_load_local_s128_align16 @@ -2396,7 +2741,16 @@ body: | ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY7]](s32) ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) + ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32) + ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; CI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32) + ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; CI: [[GEP7:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C8]](s32) ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p3) :: (load 1, addrspace 3) ; CI: [[GEP8:%[0-9]+]]:_(p3) = G_GEP [[GEP7]], [[C]](s32) @@ -2418,34 +2772,42 @@ body: | ; CI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) ; CI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C9]] - ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY9]](s32) - ; CI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; CI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] + ; CI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY9]](s32) + ; CI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) + ; CI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] ; CI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; CI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] ; CI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) ; CI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C9]] - ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY11]](s32) - ; CI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) - ; CI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] + ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY11]](s32) + ; CI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) + ; CI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] ; CI: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32) ; CI: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]] ; CI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) ; CI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C9]] - ; CI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY13]](s32) - ; CI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) - ; CI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] + ; CI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY13]](s32) + ; CI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32) + ; CI: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] ; CI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; CI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]] ; CI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32) ; CI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C9]] - ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY15]](s32) - ; CI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) - ; CI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] - ; CI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16), [[OR6]](s16), [[OR7]](s16) + ; CI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY15]](s32) + ; CI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) + ; CI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] + ; CI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; CI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; CI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C10]](s32) + ; CI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] + ; CI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) + ; CI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; CI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C10]](s32) + ; CI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; CI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) ; CI: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[MV1]](s64) ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV2]](s128) ; CI-DS128-LABEL: name: test_load_local_s128_align16 @@ -2562,7 +2924,24 @@ body: | ; CI-DS128: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY15]](s32) ; CI-DS128: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) ; CI-DS128: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] - ; CI-DS128: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16), [[OR4]](s16), [[OR5]](s16), [[OR6]](s16), [[OR7]](s16) + ; CI-DS128: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI-DS128: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI-DS128: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-DS128: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C17]](s32) + ; CI-DS128: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL8]] + ; CI-DS128: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CI-DS128: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI-DS128: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C17]](s32) + ; CI-DS128: [[OR9:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL9]] + ; CI-DS128: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; CI-DS128: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) + ; CI-DS128: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C17]](s32) + ; CI-DS128: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] + ; CI-DS128: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; CI-DS128: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; CI-DS128: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C17]](s32) + ; CI-DS128: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; CI-DS128: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR8]](s32), [[OR9]](s32), [[OR10]](s32), [[OR11]](s32) ; CI-DS128: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) ; VI-LABEL: name: test_load_local_s128_align16 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 @@ -2614,9 +2993,18 @@ body: | ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]] ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16) ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) - ; VI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI: [[GEP7:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C9]](s32) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) + ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) + ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) + ; VI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; VI: [[GEP7:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C10]](s32) ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p3) :: (load 1, addrspace 3) ; VI: [[GEP8:%[0-9]+]]:_(p3) = G_GEP [[GEP7]], [[C]](s32) ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p3) :: (load 1, addrspace 3) @@ -2636,27 +3024,35 @@ body: | ; VI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] ; VI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) ; VI: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]] - ; VI: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) - ; VI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]] + ; VI: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) + ; VI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]] ; VI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; VI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] ; VI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) ; VI: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]] - ; VI: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) - ; VI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] + ; VI: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) + ; VI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]] ; VI: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32) ; VI: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]] ; VI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32) ; VI: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]] - ; VI: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16) - ; VI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL6]] + ; VI: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16) + ; VI: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]] ; VI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; VI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]] ; VI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32) ; VI: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]] - ; VI: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16) - ; VI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL7]] - ; VI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16), [[OR6]](s16), [[OR7]](s16) + ; VI: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16) + ; VI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]] + ; VI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; VI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; VI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32) + ; VI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] + ; VI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) + ; VI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32) + ; VI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; VI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) ; VI: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[MV1]](s64) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV2]](s128) ; GFX9-LABEL: name: test_load_local_s128_align16 @@ -2709,9 +3105,18 @@ body: | ; GFX9: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]] ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16) ; GFX9: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) - ; GFX9: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9: [[GEP7:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C9]](s32) + ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) + ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) + ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) + ; GFX9: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9: [[GEP7:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C10]](s32) ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p3) :: (load 1, addrspace 3) ; GFX9: [[GEP8:%[0-9]+]]:_(p3) = G_GEP [[GEP7]], [[C]](s32) ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p3) :: (load 1, addrspace 3) @@ -2731,27 +3136,35 @@ body: | ; GFX9: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] ; GFX9: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) ; GFX9: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]] - ; GFX9: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) - ; GFX9: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]] + ; GFX9: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) + ; GFX9: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]] ; GFX9: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; GFX9: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] ; GFX9: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) ; GFX9: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]] - ; GFX9: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) - ; GFX9: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] + ; GFX9: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) + ; GFX9: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]] ; GFX9: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32) ; GFX9: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]] ; GFX9: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32) ; GFX9: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]] - ; GFX9: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16) - ; GFX9: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL6]] + ; GFX9: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16) + ; GFX9: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]] ; GFX9: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; GFX9: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]] ; GFX9: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32) ; GFX9: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]] - ; GFX9: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16) - ; GFX9: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL7]] - ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16), [[OR6]](s16), [[OR7]](s16) + ; GFX9: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16) + ; GFX9: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]] + ; GFX9: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; GFX9: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; GFX9: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32) + ; GFX9: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] + ; GFX9: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) + ; GFX9: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; GFX9: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32) + ; GFX9: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) ; GFX9: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[MV1]](s64) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV2]](s128) %0:_(p3) = COPY $vgpr0 @@ -2862,167 +3275,257 @@ body: | ; SI-LABEL: name: test_load_local_s128_align2 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[GEP:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C]](s32) ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p3) :: (load 2, addrspace 3) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[GEP1:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C1]](s32) ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p3) :: (load 2, addrspace 3) - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 ; SI: [[GEP2:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C2]](s32) ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p3) :: (load 2, addrspace 3) - ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) - ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI: [[GEP3:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C3]](s32) + ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) + ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; SI: [[GEP3:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C5]](s32) ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p3) :: (load 2, addrspace 3) - ; SI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; SI: [[GEP4:%[0-9]+]]:_(p3) = G_GEP [[GEP3]], [[C]](s32) ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p3) :: (load 2, addrspace 3) - ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) ; SI: [[GEP5:%[0-9]+]]:_(p3) = G_GEP [[GEP3]], [[C1]](s32) ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[GEP5]](p3) :: (load 2, addrspace 3) - ; SI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; SI: [[GEP6:%[0-9]+]]:_(p3) = G_GEP [[GEP3]], [[C2]](s32) ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[GEP6]](p3) :: (load 2, addrspace 3) - ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; SI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[TRUNC4]](s16), [[TRUNC5]](s16), [[TRUNC6]](s16), [[TRUNC7]](s16) + ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] + ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] + ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) + ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] + ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) + ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]] + ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C4]](s32) + ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; SI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR3]](s32) ; SI: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[MV1]](s64) ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV2]](s128) ; CI-LABEL: name: test_load_local_s128_align2 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) - ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[GEP:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C]](s32) ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p3) :: (load 2, addrspace 3) - ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[GEP1:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C1]](s32) ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p3) :: (load 2, addrspace 3) - ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 ; CI: [[GEP2:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C2]](s32) ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p3) :: (load 2, addrspace 3) - ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) - ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI: [[GEP3:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C3]](s32) + ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) + ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI: [[GEP3:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C5]](s32) ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p3) :: (load 2, addrspace 3) - ; CI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; CI: [[GEP4:%[0-9]+]]:_(p3) = G_GEP [[GEP3]], [[C]](s32) ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p3) :: (load 2, addrspace 3) - ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) ; CI: [[GEP5:%[0-9]+]]:_(p3) = G_GEP [[GEP3]], [[C1]](s32) ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[GEP5]](p3) :: (load 2, addrspace 3) - ; CI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; CI: [[GEP6:%[0-9]+]]:_(p3) = G_GEP [[GEP3]], [[C2]](s32) ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[GEP6]](p3) :: (load 2, addrspace 3) - ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; CI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[TRUNC4]](s16), [[TRUNC5]](s16), [[TRUNC6]](s16), [[TRUNC7]](s16) + ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] + ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] + ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) + ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] + ; CI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) + ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]] + ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C4]](s32) + ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; CI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR3]](s32) ; CI: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[MV1]](s64) ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV2]](s128) ; CI-DS128-LABEL: name: test_load_local_s128_align2 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) - ; CI-DS128: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI-DS128: [[GEP:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C]](s32) ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p3) :: (load 2, addrspace 3) - ; CI-DS128: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI-DS128: [[GEP1:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C1]](s32) ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p3) :: (load 2, addrspace 3) - ; CI-DS128: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 ; CI-DS128: [[GEP2:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C2]](s32) ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p3) :: (load 2, addrspace 3) - ; CI-DS128: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) ; CI-DS128: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI-DS128: [[GEP3:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C3]](s32) ; CI-DS128: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p3) :: (load 2, addrspace 3) - ; CI-DS128: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; CI-DS128: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 ; CI-DS128: [[GEP4:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C4]](s32) ; CI-DS128: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p3) :: (load 2, addrspace 3) - ; CI-DS128: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) ; CI-DS128: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CI-DS128: [[GEP5:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C5]](s32) ; CI-DS128: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[GEP5]](p3) :: (load 2, addrspace 3) - ; CI-DS128: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; CI-DS128: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 14 ; CI-DS128: [[GEP6:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C6]](s32) ; CI-DS128: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[GEP6]](p3) :: (load 2, addrspace 3) - ; CI-DS128: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; CI-DS128: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16), [[TRUNC4]](s16), [[TRUNC5]](s16), [[TRUNC6]](s16), [[TRUNC7]](s16) + ; CI-DS128: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-DS128: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-DS128: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C7]] + ; CI-DS128: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-DS128: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C7]] + ; CI-DS128: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-DS128: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C8]](s32) + ; CI-DS128: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI-DS128: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI-DS128: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C7]] + ; CI-DS128: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI-DS128: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C7]] + ; CI-DS128: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C8]](s32) + ; CI-DS128: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI-DS128: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; CI-DS128: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C7]] + ; CI-DS128: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; CI-DS128: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C7]] + ; CI-DS128: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C8]](s32) + ; CI-DS128: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CI-DS128: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; CI-DS128: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C7]] + ; CI-DS128: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) + ; CI-DS128: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C7]] + ; CI-DS128: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32) + ; CI-DS128: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; CI-DS128: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) ; CI-DS128: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) ; VI-LABEL: name: test_load_local_s128_align2 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[GEP:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C]](s32) ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p3) :: (load 2, addrspace 3) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[GEP1:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C1]](s32) ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p3) :: (load 2, addrspace 3) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 ; VI: [[GEP2:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C2]](s32) ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p3) :: (load 2, addrspace 3) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) - ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI: [[GEP3:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C3]](s32) + ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) + ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; VI: [[GEP3:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C5]](s32) ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p3) :: (load 2, addrspace 3) - ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; VI: [[GEP4:%[0-9]+]]:_(p3) = G_GEP [[GEP3]], [[C]](s32) ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p3) :: (load 2, addrspace 3) - ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) ; VI: [[GEP5:%[0-9]+]]:_(p3) = G_GEP [[GEP3]], [[C1]](s32) ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[GEP5]](p3) :: (load 2, addrspace 3) - ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; VI: [[GEP6:%[0-9]+]]:_(p3) = G_GEP [[GEP3]], [[C2]](s32) ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[GEP6]](p3) :: (load 2, addrspace 3) - ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; VI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[TRUNC4]](s16), [[TRUNC5]](s16), [[TRUNC6]](s16), [[TRUNC7]](s16) + ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] + ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] + ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) + ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]] + ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C4]](s32) + ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; VI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR3]](s32) ; VI: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[MV1]](s64) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV2]](s128) ; GFX9-LABEL: name: test_load_local_s128_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[GEP:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C]](s32) ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p3) :: (load 2, addrspace 3) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[GEP1:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C1]](s32) ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p3) :: (load 2, addrspace 3) - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 ; GFX9: [[GEP2:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C2]](s32) ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p3) :: (load 2, addrspace 3) - ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9: [[GEP3:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C3]](s32) + ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) + ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9: [[GEP3:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C5]](s32) ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p3) :: (load 2, addrspace 3) - ; GFX9: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; GFX9: [[GEP4:%[0-9]+]]:_(p3) = G_GEP [[GEP3]], [[C]](s32) ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p3) :: (load 2, addrspace 3) - ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) ; GFX9: [[GEP5:%[0-9]+]]:_(p3) = G_GEP [[GEP3]], [[C1]](s32) ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[GEP5]](p3) :: (load 2, addrspace 3) - ; GFX9: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; GFX9: [[GEP6:%[0-9]+]]:_(p3) = G_GEP [[GEP3]], [[C2]](s32) ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[GEP6]](p3) :: (load 2, addrspace 3) - ; GFX9: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[TRUNC4]](s16), [[TRUNC5]](s16), [[TRUNC6]](s16), [[TRUNC7]](s16) + ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] + ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] + ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) + ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; GFX9: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] + ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) + ; GFX9: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]] + ; GFX9: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C4]](s32) + ; GFX9: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR3]](s32) ; GFX9: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[MV1]](s64) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV2]](s128) %0:_(p3) = COPY $vgpr0 @@ -3095,7 +3598,16 @@ body: | ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY7]](s32) ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; SI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32) + ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32) + ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; SI: [[GEP7:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C8]](s32) ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p3) :: (load 1, addrspace 3) ; SI: [[GEP8:%[0-9]+]]:_(p3) = G_GEP [[GEP7]], [[C]](s32) @@ -3117,34 +3629,42 @@ body: | ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C9]] - ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY9]](s32) - ; SI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; SI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] + ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY9]](s32) + ; SI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) + ; SI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] ; SI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; SI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C9]] - ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY11]](s32) - ; SI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) - ; SI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] + ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY11]](s32) + ; SI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) + ; SI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] ; SI: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32) ; SI: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]] ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; SI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) ; SI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C9]] - ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY13]](s32) - ; SI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) - ; SI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] + ; SI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY13]](s32) + ; SI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32) + ; SI: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] ; SI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; SI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]] ; SI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; SI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32) ; SI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C9]] - ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY15]](s32) - ; SI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) - ; SI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] - ; SI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16), [[OR6]](s16), [[OR7]](s16) + ; SI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY15]](s32) + ; SI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) + ; SI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] + ; SI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; SI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; SI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C10]](s32) + ; SI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] + ; SI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) + ; SI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; SI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C10]](s32) + ; SI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; SI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) ; SI: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[MV1]](s64) ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV2]](s128) ; CI-LABEL: name: test_load_local_s128_align1 @@ -3206,7 +3726,16 @@ body: | ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY7]](s32) ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) + ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32) + ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; CI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32) + ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; CI: [[GEP7:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C8]](s32) ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p3) :: (load 1, addrspace 3) ; CI: [[GEP8:%[0-9]+]]:_(p3) = G_GEP [[GEP7]], [[C]](s32) @@ -3228,34 +3757,42 @@ body: | ; CI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) ; CI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C9]] - ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY9]](s32) - ; CI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; CI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] + ; CI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY9]](s32) + ; CI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) + ; CI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] ; CI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; CI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] ; CI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) ; CI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C9]] - ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY11]](s32) - ; CI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) - ; CI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] + ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY11]](s32) + ; CI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) + ; CI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] ; CI: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32) ; CI: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]] ; CI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) ; CI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C9]] - ; CI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY13]](s32) - ; CI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) - ; CI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] + ; CI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY13]](s32) + ; CI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32) + ; CI: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] ; CI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; CI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]] ; CI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32) ; CI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C9]] - ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY15]](s32) - ; CI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) - ; CI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] - ; CI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16), [[OR6]](s16), [[OR7]](s16) + ; CI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY15]](s32) + ; CI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) + ; CI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] + ; CI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; CI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; CI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C10]](s32) + ; CI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] + ; CI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) + ; CI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; CI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C10]](s32) + ; CI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; CI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) ; CI: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[MV1]](s64) ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV2]](s128) ; CI-DS128-LABEL: name: test_load_local_s128_align1 @@ -3372,7 +3909,24 @@ body: | ; CI-DS128: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY15]](s32) ; CI-DS128: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) ; CI-DS128: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] - ; CI-DS128: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16), [[OR4]](s16), [[OR5]](s16), [[OR6]](s16), [[OR7]](s16) + ; CI-DS128: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI-DS128: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI-DS128: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-DS128: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C17]](s32) + ; CI-DS128: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL8]] + ; CI-DS128: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CI-DS128: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI-DS128: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C17]](s32) + ; CI-DS128: [[OR9:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL9]] + ; CI-DS128: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; CI-DS128: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) + ; CI-DS128: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C17]](s32) + ; CI-DS128: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] + ; CI-DS128: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; CI-DS128: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; CI-DS128: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C17]](s32) + ; CI-DS128: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; CI-DS128: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR8]](s32), [[OR9]](s32), [[OR10]](s32), [[OR11]](s32) ; CI-DS128: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) ; VI-LABEL: name: test_load_local_s128_align1 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 @@ -3424,9 +3978,18 @@ body: | ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]] ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16) ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) - ; VI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI: [[GEP7:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C9]](s32) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) + ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) + ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) + ; VI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; VI: [[GEP7:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C10]](s32) ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p3) :: (load 1, addrspace 3) ; VI: [[GEP8:%[0-9]+]]:_(p3) = G_GEP [[GEP7]], [[C]](s32) ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p3) :: (load 1, addrspace 3) @@ -3446,27 +4009,35 @@ body: | ; VI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] ; VI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) ; VI: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]] - ; VI: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) - ; VI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]] + ; VI: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) + ; VI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]] ; VI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; VI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] ; VI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) ; VI: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]] - ; VI: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) - ; VI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] + ; VI: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) + ; VI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]] ; VI: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32) ; VI: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]] ; VI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32) ; VI: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]] - ; VI: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16) - ; VI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL6]] + ; VI: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16) + ; VI: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]] ; VI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; VI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]] ; VI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32) ; VI: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]] - ; VI: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16) - ; VI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL7]] - ; VI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16), [[OR6]](s16), [[OR7]](s16) + ; VI: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16) + ; VI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]] + ; VI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; VI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; VI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32) + ; VI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] + ; VI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) + ; VI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32) + ; VI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; VI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) ; VI: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[MV1]](s64) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV2]](s128) ; GFX9-LABEL: name: test_load_local_s128_align1 @@ -3519,9 +4090,18 @@ body: | ; GFX9: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]] ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16) ; GFX9: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) - ; GFX9: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9: [[GEP7:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C9]](s32) + ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) + ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) + ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) + ; GFX9: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9: [[GEP7:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C10]](s32) ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p3) :: (load 1, addrspace 3) ; GFX9: [[GEP8:%[0-9]+]]:_(p3) = G_GEP [[GEP7]], [[C]](s32) ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p3) :: (load 1, addrspace 3) @@ -3541,27 +4121,35 @@ body: | ; GFX9: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] ; GFX9: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) ; GFX9: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]] - ; GFX9: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) - ; GFX9: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]] + ; GFX9: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) + ; GFX9: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]] ; GFX9: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; GFX9: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] ; GFX9: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) ; GFX9: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]] - ; GFX9: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) - ; GFX9: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] + ; GFX9: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) + ; GFX9: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]] ; GFX9: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32) ; GFX9: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]] ; GFX9: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32) ; GFX9: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]] - ; GFX9: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16) - ; GFX9: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL6]] + ; GFX9: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16) + ; GFX9: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]] ; GFX9: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; GFX9: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]] ; GFX9: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32) ; GFX9: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]] - ; GFX9: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16) - ; GFX9: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL7]] - ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16), [[OR6]](s16), [[OR7]](s16) + ; GFX9: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16) + ; GFX9: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]] + ; GFX9: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; GFX9: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; GFX9: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32) + ; GFX9: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] + ; GFX9: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) + ; GFX9: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; GFX9: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32) + ; GFX9: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) ; GFX9: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[MV1]](s64) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV2]](s128) %0:_(p3) = COPY $vgpr0 @@ -3640,92 +4228,142 @@ body: | ; SI-LABEL: name: test_load_local_p1_align2 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[GEP:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C]](s32) ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p3) :: (load 2, addrspace 3) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[GEP1:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C1]](s32) ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p3) :: (load 2, addrspace 3) - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 ; SI: [[GEP2:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C2]](s32) ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p3) :: (load 2, addrspace 3) - ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; SI: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) + ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; SI: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; SI: $vgpr0_vgpr1 = COPY [[MV]](p1) ; CI-LABEL: name: test_load_local_p1_align2 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) - ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[GEP:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C]](s32) ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p3) :: (load 2, addrspace 3) - ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[GEP1:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C1]](s32) ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p3) :: (load 2, addrspace 3) - ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 ; CI: [[GEP2:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C2]](s32) ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p3) :: (load 2, addrspace 3) - ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; CI: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) + ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; CI: $vgpr0_vgpr1 = COPY [[MV]](p1) ; CI-DS128-LABEL: name: test_load_local_p1_align2 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) - ; CI-DS128: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI-DS128: [[GEP:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C]](s32) ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p3) :: (load 2, addrspace 3) - ; CI-DS128: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI-DS128: [[GEP1:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C1]](s32) ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p3) :: (load 2, addrspace 3) - ; CI-DS128: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 ; CI-DS128: [[GEP2:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C2]](s32) ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p3) :: (load 2, addrspace 3) - ; CI-DS128: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; CI-DS128: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) + ; CI-DS128: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-DS128: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-DS128: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; CI-DS128: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-DS128: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; CI-DS128: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-DS128: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; CI-DS128: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI-DS128: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI-DS128: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; CI-DS128: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI-DS128: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; CI-DS128: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; CI-DS128: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI-DS128: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; CI-DS128: $vgpr0_vgpr1 = COPY [[MV]](p1) ; VI-LABEL: name: test_load_local_p1_align2 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[GEP:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C]](s32) ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p3) :: (load 2, addrspace 3) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[GEP1:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C1]](s32) ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p3) :: (load 2, addrspace 3) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 ; VI: [[GEP2:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C2]](s32) ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p3) :: (load 2, addrspace 3) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; VI: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) + ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; VI: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; VI: $vgpr0_vgpr1 = COPY [[MV]](p1) ; GFX9-LABEL: name: test_load_local_p1_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[GEP:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C]](s32) ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p3) :: (load 2, addrspace 3) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[GEP1:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C1]](s32) ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p3) :: (load 2, addrspace 3) - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 ; GFX9: [[GEP2:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C2]](s32) ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p3) :: (load 2, addrspace 3) - ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX9: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) + ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; GFX9: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[MV]](p1) %0:_(p3) = COPY $vgpr0 %1:_(p1) = G_LOAD %0 :: (load 8, align 2, addrspace 3) @@ -3796,7 +4434,16 @@ body: | ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32) ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; SI: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; SI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32) + ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32) + ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; SI: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; SI: $vgpr0_vgpr1 = COPY [[MV]](p1) ; CI-LABEL: name: test_load_local_p1_align1 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 @@ -3856,7 +4503,16 @@ body: | ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32) ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) + ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32) + ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; CI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32) + ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; CI: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; CI: $vgpr0_vgpr1 = COPY [[MV]](p1) ; CI-DS128-LABEL: name: test_load_local_p1_align1 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 @@ -3916,7 +4572,16 @@ body: | ; CI-DS128: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32) ; CI-DS128: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; CI-DS128: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI-DS128: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) + ; CI-DS128: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI-DS128: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI-DS128: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-DS128: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32) + ; CI-DS128: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; CI-DS128: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CI-DS128: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI-DS128: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32) + ; CI-DS128: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; CI-DS128: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; CI-DS128: $vgpr0_vgpr1 = COPY [[MV]](p1) ; VI-LABEL: name: test_load_local_p1_align1 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 @@ -3968,7 +4633,16 @@ body: | ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]] ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16) ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; VI: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) + ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) + ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; VI: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; VI: $vgpr0_vgpr1 = COPY [[MV]](p1) ; GFX9-LABEL: name: test_load_local_p1_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 @@ -4020,7 +4694,16 @@ body: | ; GFX9: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]] ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16) ; GFX9: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; GFX9: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) + ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) + ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) + ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; GFX9: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[MV]](p1) %0:_(p3) = COPY $vgpr0 %1:_(p1) = G_LOAD %0 :: (load 8, align 1, addrspace 3) @@ -4067,53 +4750,83 @@ body: | ; SI-LABEL: name: test_load_local_p3_align2 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[GEP:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C]](s32) ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p3) :: (load 2, addrspace 3) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; SI: [[MV:%[0-9]+]]:_(p3) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; SI: $vgpr0 = COPY [[MV]](p3) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; SI: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](s32) + ; SI: $vgpr0 = COPY [[INTTOPTR]](p3) ; CI-LABEL: name: test_load_local_p3_align2 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) - ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[GEP:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C]](s32) ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p3) :: (load 2, addrspace 3) - ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; CI: [[MV:%[0-9]+]]:_(p3) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; CI: $vgpr0 = COPY [[MV]](p3) + ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](s32) + ; CI: $vgpr0 = COPY [[INTTOPTR]](p3) ; CI-DS128-LABEL: name: test_load_local_p3_align2 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) - ; CI-DS128: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI-DS128: [[GEP:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C]](s32) ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p3) :: (load 2, addrspace 3) - ; CI-DS128: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; CI-DS128: [[MV:%[0-9]+]]:_(p3) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; CI-DS128: $vgpr0 = COPY [[MV]](p3) + ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-DS128: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-DS128: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CI-DS128: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-DS128: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-DS128: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; CI-DS128: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI-DS128: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](s32) + ; CI-DS128: $vgpr0 = COPY [[INTTOPTR]](p3) ; VI-LABEL: name: test_load_local_p3_align2 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[GEP:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C]](s32) ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p3) :: (load 2, addrspace 3) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; VI: [[MV:%[0-9]+]]:_(p3) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; VI: $vgpr0 = COPY [[MV]](p3) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; VI: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](s32) + ; VI: $vgpr0 = COPY [[INTTOPTR]](p3) ; GFX9-LABEL: name: test_load_local_p3_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[GEP:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C]](s32) ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p3) :: (load 2, addrspace 3) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9: [[MV:%[0-9]+]]:_(p3) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9: $vgpr0 = COPY [[MV]](p3) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX9: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](s32) + ; GFX9: $vgpr0 = COPY [[INTTOPTR]](p3) %0:_(p3) = COPY $vgpr0 %1:_(p3) = G_LOAD %0 :: (load 4, align 2, addrspace 3) $vgpr0 = COPY %1 @@ -4155,8 +4868,13 @@ body: | ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; SI: [[MV:%[0-9]+]]:_(p3) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; SI: $vgpr0 = COPY [[MV]](p3) + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32) + ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; SI: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32) + ; SI: $vgpr0 = COPY [[INTTOPTR]](p3) ; CI-LABEL: name: test_load_local_p3_align1 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) @@ -4187,8 +4905,13 @@ body: | ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI: [[MV:%[0-9]+]]:_(p3) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; CI: $vgpr0 = COPY [[MV]](p3) + ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32) + ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; CI: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32) + ; CI: $vgpr0 = COPY [[INTTOPTR]](p3) ; CI-DS128-LABEL: name: test_load_local_p3_align1 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) @@ -4219,8 +4942,13 @@ body: | ; CI-DS128: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) ; CI-DS128: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CI-DS128: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI-DS128: [[MV:%[0-9]+]]:_(p3) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; CI-DS128: $vgpr0 = COPY [[MV]](p3) + ; CI-DS128: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI-DS128: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI-DS128: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-DS128: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32) + ; CI-DS128: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; CI-DS128: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32) + ; CI-DS128: $vgpr0 = COPY [[INTTOPTR]](p3) ; VI-LABEL: name: test_load_local_p3_align1 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) @@ -4247,8 +4975,13 @@ body: | ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; VI: [[MV:%[0-9]+]]:_(p3) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; VI: $vgpr0 = COPY [[MV]](p3) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; VI: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32) + ; VI: $vgpr0 = COPY [[INTTOPTR]](p3) ; GFX9-LABEL: name: test_load_local_p3_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) @@ -4275,8 +5008,13 @@ body: | ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX9: [[MV:%[0-9]+]]:_(p3) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; GFX9: $vgpr0 = COPY [[MV]](p3) + ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; GFX9: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32) + ; GFX9: $vgpr0 = COPY [[INTTOPTR]](p3) %0:_(p3) = COPY $vgpr0 %1:_(p3) = G_LOAD %0 :: (load 4, align 1, addrspace 3) $vgpr0 = COPY %1 @@ -4322,53 +5060,83 @@ body: | ; SI-LABEL: name: test_load_local_p5_align2 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[GEP:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C]](s32) ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p3) :: (load 2, addrspace 3) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; SI: [[MV:%[0-9]+]]:_(p5) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; SI: $vgpr0 = COPY [[MV]](p5) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; SI: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) + ; SI: $vgpr0 = COPY [[INTTOPTR]](p5) ; CI-LABEL: name: test_load_local_p5_align2 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) - ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[GEP:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C]](s32) ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p3) :: (load 2, addrspace 3) - ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; CI: [[MV:%[0-9]+]]:_(p5) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; CI: $vgpr0 = COPY [[MV]](p5) + ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) + ; CI: $vgpr0 = COPY [[INTTOPTR]](p5) ; CI-DS128-LABEL: name: test_load_local_p5_align2 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) - ; CI-DS128: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI-DS128: [[GEP:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C]](s32) ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p3) :: (load 2, addrspace 3) - ; CI-DS128: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; CI-DS128: [[MV:%[0-9]+]]:_(p5) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; CI-DS128: $vgpr0 = COPY [[MV]](p5) + ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-DS128: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-DS128: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CI-DS128: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-DS128: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-DS128: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; CI-DS128: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI-DS128: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) + ; CI-DS128: $vgpr0 = COPY [[INTTOPTR]](p5) ; VI-LABEL: name: test_load_local_p5_align2 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[GEP:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C]](s32) ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p3) :: (load 2, addrspace 3) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; VI: [[MV:%[0-9]+]]:_(p5) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; VI: $vgpr0 = COPY [[MV]](p5) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; VI: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) + ; VI: $vgpr0 = COPY [[INTTOPTR]](p5) ; GFX9-LABEL: name: test_load_local_p5_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[GEP:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C]](s32) ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p3) :: (load 2, addrspace 3) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9: [[MV:%[0-9]+]]:_(p5) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9: $vgpr0 = COPY [[MV]](p5) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX9: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) + ; GFX9: $vgpr0 = COPY [[INTTOPTR]](p5) %0:_(p3) = COPY $vgpr0 %1:_(p5) = G_LOAD %0 :: (load 4, align 2, addrspace 3) $vgpr0 = COPY %1 @@ -4410,8 +5178,13 @@ body: | ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; SI: [[MV:%[0-9]+]]:_(p5) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; SI: $vgpr0 = COPY [[MV]](p5) + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32) + ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; SI: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) + ; SI: $vgpr0 = COPY [[INTTOPTR]](p5) ; CI-LABEL: name: test_load_local_p5_align1 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) @@ -4442,8 +5215,13 @@ body: | ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI: [[MV:%[0-9]+]]:_(p5) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; CI: $vgpr0 = COPY [[MV]](p5) + ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32) + ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; CI: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) + ; CI: $vgpr0 = COPY [[INTTOPTR]](p5) ; CI-DS128-LABEL: name: test_load_local_p5_align1 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) @@ -4474,8 +5252,13 @@ body: | ; CI-DS128: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) ; CI-DS128: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CI-DS128: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI-DS128: [[MV:%[0-9]+]]:_(p5) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; CI-DS128: $vgpr0 = COPY [[MV]](p5) + ; CI-DS128: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI-DS128: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI-DS128: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-DS128: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32) + ; CI-DS128: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; CI-DS128: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) + ; CI-DS128: $vgpr0 = COPY [[INTTOPTR]](p5) ; VI-LABEL: name: test_load_local_p5_align1 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) @@ -4502,8 +5285,13 @@ body: | ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; VI: [[MV:%[0-9]+]]:_(p5) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; VI: $vgpr0 = COPY [[MV]](p5) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; VI: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) + ; VI: $vgpr0 = COPY [[INTTOPTR]](p5) ; GFX9-LABEL: name: test_load_local_p5_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) @@ -4530,8 +5318,13 @@ body: | ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX9: [[MV:%[0-9]+]]:_(p5) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; GFX9: $vgpr0 = COPY [[MV]](p5) + ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; GFX9: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) + ; GFX9: $vgpr0 = COPY [[INTTOPTR]](p5) %0:_(p3) = COPY $vgpr0 %1:_(p5) = G_LOAD %0 :: (load 4, align 1, addrspace 3) $vgpr0 = COPY %1 @@ -6446,9 +7239,13 @@ body: | ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; SI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI: [[GEP3:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C6]](s32) + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32) + ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; SI: [[GEP3:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C7]](s32) ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p3) :: (load 1, addrspace 3) ; SI: [[GEP4:%[0-9]+]]:_(p3) = G_GEP [[GEP3]], [[C]](s32) ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p3) :: (load 1, addrspace 3) @@ -6461,19 +7258,22 @@ body: | ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] - ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) - ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; SI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] + ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) + ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) + ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; SI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; SI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C5]] - ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY6]](s32) - ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; SI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32) + ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY6]](s32) + ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) + ; SI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] + ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C6]](s32) + ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; CI-LABEL: name: test_load_local_v2s32_align1 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 @@ -6505,9 +7305,13 @@ body: | ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI: [[GEP3:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C6]](s32) + ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32) + ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CI: [[GEP3:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C7]](s32) ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p3) :: (load 1, addrspace 3) ; CI: [[GEP4:%[0-9]+]]:_(p3) = G_GEP [[GEP3]], [[C]](s32) ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p3) :: (load 1, addrspace 3) @@ -6520,19 +7324,22 @@ body: | ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] - ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) - ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; CI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] + ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) + ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) + ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; CI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; CI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C5]] - ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY6]](s32) - ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) - ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32) + ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY6]](s32) + ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) + ; CI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] + ; CI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C6]](s32) + ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) ; CI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; CI-DS128-LABEL: name: test_load_local_v2s32_align1 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 @@ -6564,9 +7371,13 @@ body: | ; CI-DS128: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) ; CI-DS128: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CI-DS128: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI-DS128: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; CI-DS128: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-DS128: [[GEP3:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C6]](s32) + ; CI-DS128: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI-DS128: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI-DS128: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-DS128: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32) + ; CI-DS128: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; CI-DS128: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CI-DS128: [[GEP3:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C7]](s32) ; CI-DS128: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p3) :: (load 1, addrspace 3) ; CI-DS128: [[GEP4:%[0-9]+]]:_(p3) = G_GEP [[GEP3]], [[C]](s32) ; CI-DS128: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p3) :: (load 1, addrspace 3) @@ -6579,19 +7390,22 @@ body: | ; CI-DS128: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI-DS128: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) ; CI-DS128: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] - ; CI-DS128: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) - ; CI-DS128: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; CI-DS128: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] + ; CI-DS128: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) + ; CI-DS128: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) + ; CI-DS128: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; CI-DS128: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; CI-DS128: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; CI-DS128: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI-DS128: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) ; CI-DS128: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C5]] - ; CI-DS128: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY6]](s32) - ; CI-DS128: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; CI-DS128: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI-DS128: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) - ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32) + ; CI-DS128: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY6]](s32) + ; CI-DS128: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) + ; CI-DS128: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] + ; CI-DS128: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI-DS128: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; CI-DS128: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C6]](s32) + ; CI-DS128: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) ; CI-DS128: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; VI-LABEL: name: test_load_local_v2s32_align1 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 @@ -6619,9 +7433,13 @@ body: | ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; VI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI: [[GEP3:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C5]](s32) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; VI: [[GEP3:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C6]](s32) ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p3) :: (load 1, addrspace 3) ; VI: [[GEP4:%[0-9]+]]:_(p3) = G_GEP [[GEP3]], [[C]](s32) ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p3) :: (load 1, addrspace 3) @@ -6633,16 +7451,19 @@ body: | ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C3]] ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C3]] - ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) - ; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] + ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) + ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]] ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; VI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C3]] - ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) - ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; VI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32) + ; VI: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) + ; VI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL4]] + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C5]](s32) + ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-LABEL: name: test_load_local_v2s32_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 @@ -6670,9 +7491,13 @@ body: | ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX9: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9: [[GEP3:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C5]](s32) + ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX9: [[GEP3:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C6]](s32) ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p3) :: (load 1, addrspace 3) ; GFX9: [[GEP4:%[0-9]+]]:_(p3) = G_GEP [[GEP3]], [[C]](s32) ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p3) :: (load 1, addrspace 3) @@ -6684,16 +7509,19 @@ body: | ; GFX9: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C3]] ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) ; GFX9: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C3]] - ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) - ; GFX9: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] + ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) + ; GFX9: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]] ; GFX9: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; GFX9: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; GFX9: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) ; GFX9: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C3]] - ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) - ; GFX9: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; GFX9: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32) + ; GFX9: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) + ; GFX9: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL4]] + ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C5]](s32) + ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(p3) = COPY $vgpr0 %1:_(<2 x s32>) = G_LOAD %0 :: (load 8, align 1, addrspace 3) @@ -6737,9 +7565,13 @@ body: | ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY3]](s32) ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; SI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI: [[GEP3:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C6]](s32) + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32) + ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; SI: [[GEP3:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C7]](s32) ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p3) :: (load 1, addrspace 3) ; SI: [[GEP4:%[0-9]+]]:_(p3) = G_GEP [[GEP3]], [[C]](s32) ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p3) :: (load 1, addrspace 3) @@ -6752,18 +7584,21 @@ body: | ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]] - ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY5]](s32) - ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; SI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] + ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY5]](s32) + ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) + ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; SI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; SI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C5]] - ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY7]](s32) - ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; SI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) + ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY7]](s32) + ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) + ; SI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] + ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C6]](s32) + ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] ; SI: [[GEP7:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C4]](s32) ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p3) :: (load 1, addrspace 3) ; SI: [[GEP8:%[0-9]+]]:_(p3) = G_GEP [[GEP7]], [[C]](s32) @@ -6777,19 +7612,22 @@ body: | ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C5]] - ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY9]](s32) - ; SI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; SI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] + ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY9]](s32) + ; SI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) + ; SI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] ; SI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; SI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C3]] ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C5]] - ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY11]](s32) - ; SI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) - ; SI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] - ; SI: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32), [[MV2]](s32) + ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY11]](s32) + ; SI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) + ; SI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] + ; SI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; SI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; SI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C6]](s32) + ; SI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) ; CI-LABEL: name: test_load_local_v3s32_align16 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 @@ -6822,9 +7660,13 @@ body: | ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY3]](s32) ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI: [[GEP3:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C6]](s32) + ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32) + ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CI: [[GEP3:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C7]](s32) ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p3) :: (load 1, addrspace 3) ; CI: [[GEP4:%[0-9]+]]:_(p3) = G_GEP [[GEP3]], [[C]](s32) ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p3) :: (load 1, addrspace 3) @@ -6837,18 +7679,21 @@ body: | ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]] - ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY5]](s32) - ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; CI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] + ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY5]](s32) + ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) + ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; CI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; CI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; CI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C5]] - ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY7]](s32) - ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) + ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY7]](s32) + ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) + ; CI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] + ; CI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C6]](s32) + ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] ; CI: [[GEP7:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C4]](s32) ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p3) :: (load 1, addrspace 3) ; CI: [[GEP8:%[0-9]+]]:_(p3) = G_GEP [[GEP7]], [[C]](s32) @@ -6862,19 +7707,22 @@ body: | ; CI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) ; CI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C5]] - ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY9]](s32) - ; CI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; CI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] + ; CI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY9]](s32) + ; CI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) + ; CI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] ; CI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; CI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C3]] ; CI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) ; CI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C5]] - ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY11]](s32) - ; CI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) - ; CI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] - ; CI: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16) - ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32), [[MV2]](s32) + ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY11]](s32) + ; CI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) + ; CI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] + ; CI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; CI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; CI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C6]](s32) + ; CI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] + ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) ; CI-DS128-LABEL: name: test_load_local_v3s32_align16 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 @@ -6907,9 +7755,13 @@ body: | ; CI-DS128: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY3]](s32) ; CI-DS128: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CI-DS128: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI-DS128: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; CI-DS128: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-DS128: [[GEP3:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C6]](s32) + ; CI-DS128: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI-DS128: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI-DS128: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-DS128: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32) + ; CI-DS128: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; CI-DS128: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CI-DS128: [[GEP3:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C7]](s32) ; CI-DS128: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p3) :: (load 1, addrspace 3) ; CI-DS128: [[GEP4:%[0-9]+]]:_(p3) = G_GEP [[GEP3]], [[C]](s32) ; CI-DS128: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p3) :: (load 1, addrspace 3) @@ -6922,18 +7774,21 @@ body: | ; CI-DS128: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI-DS128: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) ; CI-DS128: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]] - ; CI-DS128: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY5]](s32) - ; CI-DS128: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; CI-DS128: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] + ; CI-DS128: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY5]](s32) + ; CI-DS128: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) + ; CI-DS128: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; CI-DS128: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; CI-DS128: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; CI-DS128: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI-DS128: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) ; CI-DS128: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C5]] - ; CI-DS128: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY7]](s32) - ; CI-DS128: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; CI-DS128: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI-DS128: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) + ; CI-DS128: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY7]](s32) + ; CI-DS128: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) + ; CI-DS128: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] + ; CI-DS128: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI-DS128: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; CI-DS128: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C6]](s32) + ; CI-DS128: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] ; CI-DS128: [[GEP7:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C4]](s32) ; CI-DS128: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p3) :: (load 1, addrspace 3) ; CI-DS128: [[GEP8:%[0-9]+]]:_(p3) = G_GEP [[GEP7]], [[C]](s32) @@ -6947,19 +7802,22 @@ body: | ; CI-DS128: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI-DS128: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) ; CI-DS128: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C5]] - ; CI-DS128: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY9]](s32) - ; CI-DS128: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; CI-DS128: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] + ; CI-DS128: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY9]](s32) + ; CI-DS128: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) + ; CI-DS128: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] ; CI-DS128: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; CI-DS128: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C3]] ; CI-DS128: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI-DS128: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) ; CI-DS128: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C5]] - ; CI-DS128: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY11]](s32) - ; CI-DS128: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) - ; CI-DS128: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] - ; CI-DS128: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16) - ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32), [[MV2]](s32) + ; CI-DS128: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY11]](s32) + ; CI-DS128: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) + ; CI-DS128: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] + ; CI-DS128: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; CI-DS128: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; CI-DS128: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C6]](s32) + ; CI-DS128: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] + ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; CI-DS128: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) ; VI-LABEL: name: test_load_local_v3s32_align16 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 @@ -6987,9 +7845,13 @@ body: | ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; VI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI: [[GEP3:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C5]](s32) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; VI: [[GEP3:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C6]](s32) ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p3) :: (load 1, addrspace 3) ; VI: [[GEP4:%[0-9]+]]:_(p3) = G_GEP [[GEP3]], [[C]](s32) ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p3) :: (load 1, addrspace 3) @@ -7001,17 +7863,20 @@ body: | ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C3]] ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C3]] - ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) - ; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] + ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) + ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]] ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; VI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C3]] - ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) - ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; VI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) - ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI: [[GEP7:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C6]](s32) + ; VI: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) + ; VI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL4]] + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C5]](s32) + ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; VI: [[GEP7:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C7]](s32) ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p3) :: (load 1, addrspace 3) ; VI: [[GEP8:%[0-9]+]]:_(p3) = G_GEP [[GEP7]], [[C]](s32) ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p3) :: (load 1, addrspace 3) @@ -7023,16 +7888,19 @@ body: | ; VI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C3]] ; VI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) ; VI: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C3]] - ; VI: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C4]](s16) - ; VI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]] + ; VI: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C4]](s16) + ; VI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]] ; VI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; VI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C3]] ; VI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) ; VI: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C3]] - ; VI: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C4]](s16) - ; VI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] - ; VI: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32), [[MV2]](s32) + ; VI: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C4]](s16) + ; VI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]] + ; VI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; VI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; VI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C5]](s32) + ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) ; GFX9-LABEL: name: test_load_local_v3s32_align16 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 @@ -7060,9 +7928,13 @@ body: | ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX9: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9: [[GEP3:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C5]](s32) + ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX9: [[GEP3:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C6]](s32) ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p3) :: (load 1, addrspace 3) ; GFX9: [[GEP4:%[0-9]+]]:_(p3) = G_GEP [[GEP3]], [[C]](s32) ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p3) :: (load 1, addrspace 3) @@ -7074,17 +7946,20 @@ body: | ; GFX9: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C3]] ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) ; GFX9: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C3]] - ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) - ; GFX9: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] + ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) + ; GFX9: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]] ; GFX9: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; GFX9: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; GFX9: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) ; GFX9: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C3]] - ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) - ; GFX9: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; GFX9: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) - ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9: [[GEP7:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C6]](s32) + ; GFX9: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) + ; GFX9: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL4]] + ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C5]](s32) + ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; GFX9: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9: [[GEP7:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C7]](s32) ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p3) :: (load 1, addrspace 3) ; GFX9: [[GEP8:%[0-9]+]]:_(p3) = G_GEP [[GEP7]], [[C]](s32) ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p3) :: (load 1, addrspace 3) @@ -7096,16 +7971,19 @@ body: | ; GFX9: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C3]] ; GFX9: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) ; GFX9: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C3]] - ; GFX9: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C4]](s16) - ; GFX9: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]] + ; GFX9: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C4]](s16) + ; GFX9: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]] ; GFX9: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; GFX9: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C3]] ; GFX9: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) ; GFX9: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C3]] - ; GFX9: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C4]](s16) - ; GFX9: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] - ; GFX9: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32), [[MV2]](s32) + ; GFX9: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C4]](s16) + ; GFX9: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]] + ; GFX9: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; GFX9: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; GFX9: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C5]](s32) + ; GFX9: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(p3) = COPY $vgpr0 %1:_(<3 x s32>) = G_LOAD %0 :: (load 12, align 1, addrspace 3) @@ -7321,181 +8199,251 @@ body: | ; SI-LABEL: name: test_load_local_v4s32_align2 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[GEP:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C]](s32) ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p3) :: (load 2, addrspace 3) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; SI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI: [[GEP1:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C1]](s32) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; SI: [[GEP1:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C3]](s32) ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p3) :: (load 2, addrspace 3) - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; SI: [[GEP2:%[0-9]+]]:_(p3) = G_GEP [[GEP1]], [[C]](s32) ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p3) :: (load 2, addrspace 3) - ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; SI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC2]](s16), [[TRUNC3]](s16) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32) - ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI: [[GEP3:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C2]](s32) + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) + ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; SI: [[GEP3:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C4]](s32) ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p3) :: (load 2, addrspace 3) - ; SI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; SI: [[GEP4:%[0-9]+]]:_(p3) = G_GEP [[GEP3]], [[C]](s32) ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p3) :: (load 2, addrspace 3) - ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) - ; SI: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC4]](s16), [[TRUNC5]](s16) - ; SI: [[GEP5:%[0-9]+]]:_(p3) = G_GEP [[GEP3]], [[C1]](s32) + ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] + ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] + ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) + ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; SI: [[GEP5:%[0-9]+]]:_(p3) = G_GEP [[GEP3]], [[C3]](s32) ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[GEP5]](p3) :: (load 2, addrspace 3) - ; SI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; SI: [[GEP6:%[0-9]+]]:_(p3) = G_GEP [[GEP5]], [[C]](s32) ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[GEP6]](p3) :: (load 2, addrspace 3) - ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; SI: [[MV3:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC6]](s16), [[TRUNC7]](s16) - ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MV2]](s32), [[MV3]](s32) + ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] + ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) + ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]] + ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32) + ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR3]](s32) ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s32>), [[BUILD_VECTOR1]](<2 x s32>) ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>) ; CI-LABEL: name: test_load_local_v4s32_align2 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) - ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[GEP:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C]](s32) ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p3) :: (load 2, addrspace 3) - ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; CI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI: [[GEP1:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C1]](s32) + ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CI: [[GEP1:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C3]](s32) ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p3) :: (load 2, addrspace 3) - ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI: [[GEP2:%[0-9]+]]:_(p3) = G_GEP [[GEP1]], [[C]](s32) ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p3) :: (load 2, addrspace 3) - ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; CI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC2]](s16), [[TRUNC3]](s16) - ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32) - ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI: [[GEP3:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C2]](s32) + ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) + ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI: [[GEP3:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C4]](s32) ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p3) :: (load 2, addrspace 3) - ; CI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; CI: [[GEP4:%[0-9]+]]:_(p3) = G_GEP [[GEP3]], [[C]](s32) ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p3) :: (load 2, addrspace 3) - ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) - ; CI: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC4]](s16), [[TRUNC5]](s16) - ; CI: [[GEP5:%[0-9]+]]:_(p3) = G_GEP [[GEP3]], [[C1]](s32) + ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] + ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] + ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) + ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CI: [[GEP5:%[0-9]+]]:_(p3) = G_GEP [[GEP3]], [[C3]](s32) ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[GEP5]](p3) :: (load 2, addrspace 3) - ; CI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; CI: [[GEP6:%[0-9]+]]:_(p3) = G_GEP [[GEP5]], [[C]](s32) ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[GEP6]](p3) :: (load 2, addrspace 3) - ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; CI: [[MV3:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC6]](s16), [[TRUNC7]](s16) - ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MV2]](s32), [[MV3]](s32) + ; CI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] + ; CI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) + ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]] + ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32) + ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR3]](s32) ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s32>), [[BUILD_VECTOR1]](<2 x s32>) ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>) ; CI-DS128-LABEL: name: test_load_local_v4s32_align2 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) - ; CI-DS128: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI-DS128: [[GEP:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C]](s32) ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p3) :: (load 2, addrspace 3) - ; CI-DS128: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; CI-DS128: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-DS128: [[GEP1:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C1]](s32) + ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-DS128: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-DS128: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CI-DS128: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-DS128: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-DS128: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; CI-DS128: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI-DS128: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CI-DS128: [[GEP1:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C3]](s32) ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p3) :: (load 2, addrspace 3) - ; CI-DS128: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI-DS128: [[GEP2:%[0-9]+]]:_(p3) = G_GEP [[GEP1]], [[C]](s32) ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p3) :: (load 2, addrspace 3) - ; CI-DS128: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; CI-DS128: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC2]](s16), [[TRUNC3]](s16) - ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32) - ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-DS128: [[GEP3:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C2]](s32) + ; CI-DS128: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI-DS128: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; CI-DS128: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI-DS128: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; CI-DS128: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; CI-DS128: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) + ; CI-DS128: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI-DS128: [[GEP3:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C4]](s32) ; CI-DS128: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p3) :: (load 2, addrspace 3) - ; CI-DS128: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; CI-DS128: [[GEP4:%[0-9]+]]:_(p3) = G_GEP [[GEP3]], [[C]](s32) ; CI-DS128: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p3) :: (load 2, addrspace 3) - ; CI-DS128: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) - ; CI-DS128: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC4]](s16), [[TRUNC5]](s16) - ; CI-DS128: [[GEP5:%[0-9]+]]:_(p3) = G_GEP [[GEP3]], [[C1]](s32) + ; CI-DS128: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; CI-DS128: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] + ; CI-DS128: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; CI-DS128: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] + ; CI-DS128: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) + ; CI-DS128: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CI-DS128: [[GEP5:%[0-9]+]]:_(p3) = G_GEP [[GEP3]], [[C3]](s32) ; CI-DS128: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[GEP5]](p3) :: (load 2, addrspace 3) - ; CI-DS128: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; CI-DS128: [[GEP6:%[0-9]+]]:_(p3) = G_GEP [[GEP5]], [[C]](s32) ; CI-DS128: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[GEP6]](p3) :: (load 2, addrspace 3) - ; CI-DS128: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; CI-DS128: [[MV3:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC6]](s16), [[TRUNC7]](s16) - ; CI-DS128: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MV2]](s32), [[MV3]](s32) + ; CI-DS128: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; CI-DS128: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] + ; CI-DS128: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) + ; CI-DS128: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]] + ; CI-DS128: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32) + ; CI-DS128: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; CI-DS128: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR3]](s32) ; CI-DS128: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s32>), [[BUILD_VECTOR1]](<2 x s32>) ; CI-DS128: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>) ; VI-LABEL: name: test_load_local_v4s32_align2 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[GEP:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C]](s32) ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p3) :: (load 2, addrspace 3) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; VI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI: [[GEP1:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C1]](s32) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; VI: [[GEP1:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C3]](s32) ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p3) :: (load 2, addrspace 3) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; VI: [[GEP2:%[0-9]+]]:_(p3) = G_GEP [[GEP1]], [[C]](s32) ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p3) :: (load 2, addrspace 3) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; VI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC2]](s16), [[TRUNC3]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32) - ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI: [[GEP3:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C2]](s32) + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) + ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; VI: [[GEP3:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C4]](s32) ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p3) :: (load 2, addrspace 3) - ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; VI: [[GEP4:%[0-9]+]]:_(p3) = G_GEP [[GEP3]], [[C]](s32) ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p3) :: (load 2, addrspace 3) - ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) - ; VI: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC4]](s16), [[TRUNC5]](s16) - ; VI: [[GEP5:%[0-9]+]]:_(p3) = G_GEP [[GEP3]], [[C1]](s32) + ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] + ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; VI: [[GEP5:%[0-9]+]]:_(p3) = G_GEP [[GEP3]], [[C3]](s32) ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[GEP5]](p3) :: (load 2, addrspace 3) - ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; VI: [[GEP6:%[0-9]+]]:_(p3) = G_GEP [[GEP5]], [[C]](s32) ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[GEP6]](p3) :: (load 2, addrspace 3) - ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; VI: [[MV3:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC6]](s16), [[TRUNC7]](s16) - ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MV2]](s32), [[MV3]](s32) + ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] + ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) + ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]] + ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32) + ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR3]](s32) ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s32>), [[BUILD_VECTOR1]](<2 x s32>) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>) ; GFX9-LABEL: name: test_load_local_v4s32_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[GEP:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C]](s32) ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p3) :: (load 2, addrspace 3) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9: [[GEP1:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C1]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX9: [[GEP1:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C3]](s32) ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p3) :: (load 2, addrspace 3) - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; GFX9: [[GEP2:%[0-9]+]]:_(p3) = G_GEP [[GEP1]], [[C]](s32) ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p3) :: (load 2, addrspace 3) - ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX9: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32) - ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9: [[GEP3:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C2]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) + ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9: [[GEP3:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C4]](s32) ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p3) :: (load 2, addrspace 3) - ; GFX9: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; GFX9: [[GEP4:%[0-9]+]]:_(p3) = G_GEP [[GEP3]], [[C]](s32) ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p3) :: (load 2, addrspace 3) - ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) - ; GFX9: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9: [[GEP5:%[0-9]+]]:_(p3) = G_GEP [[GEP3]], [[C1]](s32) + ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] + ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] + ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) + ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; GFX9: [[GEP5:%[0-9]+]]:_(p3) = G_GEP [[GEP3]], [[C3]](s32) ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[GEP5]](p3) :: (load 2, addrspace 3) - ; GFX9: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; GFX9: [[GEP6:%[0-9]+]]:_(p3) = G_GEP [[GEP5]], [[C]](s32) ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[GEP6]](p3) :: (load 2, addrspace 3) - ; GFX9: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; GFX9: [[MV3:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC6]](s16), [[TRUNC7]](s16) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MV2]](s32), [[MV3]](s32) + ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; GFX9: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] + ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) + ; GFX9: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]] + ; GFX9: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32) + ; GFX9: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR3]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s32>), [[BUILD_VECTOR1]](<2 x s32>) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>) %0:_(p3) = COPY $vgpr0 @@ -7540,9 +8488,13 @@ body: | ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY3]](s32) ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; SI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI: [[GEP3:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C6]](s32) + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32) + ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; SI: [[GEP3:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C7]](s32) ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p3) :: (load 1, addrspace 3) ; SI: [[GEP4:%[0-9]+]]:_(p3) = G_GEP [[GEP3]], [[C]](s32) ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p3) :: (load 1, addrspace 3) @@ -7555,19 +8507,22 @@ body: | ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]] - ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY5]](s32) - ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; SI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] + ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY5]](s32) + ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) + ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; SI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; SI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C5]] - ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY7]](s32) - ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; SI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32) + ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY7]](s32) + ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) + ; SI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] + ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C6]](s32) + ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) ; SI: [[GEP7:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C4]](s32) ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p3) :: (load 1, addrspace 3) ; SI: [[GEP8:%[0-9]+]]:_(p3) = G_GEP [[GEP7]], [[C]](s32) @@ -7581,19 +8536,22 @@ body: | ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C5]] - ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY9]](s32) - ; SI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; SI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] + ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY9]](s32) + ; SI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) + ; SI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] ; SI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; SI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C3]] ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C5]] - ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY11]](s32) - ; SI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) - ; SI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] - ; SI: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16) - ; SI: [[GEP11:%[0-9]+]]:_(p3) = G_GEP [[GEP7]], [[C6]](s32) + ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY11]](s32) + ; SI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) + ; SI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] + ; SI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; SI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; SI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C6]](s32) + ; SI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] + ; SI: [[GEP11:%[0-9]+]]:_(p3) = G_GEP [[GEP7]], [[C7]](s32) ; SI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[GEP11]](p3) :: (load 1, addrspace 3) ; SI: [[GEP12:%[0-9]+]]:_(p3) = G_GEP [[GEP11]], [[C]](s32) ; SI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[GEP12]](p3) :: (load 1, addrspace 3) @@ -7606,19 +8564,22 @@ body: | ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; SI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) ; SI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C5]] - ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY13]](s32) - ; SI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) - ; SI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] + ; SI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY13]](s32) + ; SI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) + ; SI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] ; SI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; SI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C3]] ; SI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; SI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32) ; SI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C5]] - ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY15]](s32) - ; SI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) - ; SI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] - ; SI: [[MV3:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR6]](s16), [[OR7]](s16) - ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MV2]](s32), [[MV3]](s32) + ; SI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY15]](s32) + ; SI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL10]](s32) + ; SI: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] + ; SI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; SI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR10]](s16) + ; SI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C6]](s32) + ; SI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR8]](s32), [[OR11]](s32) ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s32>), [[BUILD_VECTOR1]](<2 x s32>) ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>) ; CI-LABEL: name: test_load_local_v4s32_align1 @@ -7652,9 +8613,13 @@ body: | ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY3]](s32) ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI: [[GEP3:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C6]](s32) + ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32) + ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CI: [[GEP3:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C7]](s32) ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p3) :: (load 1, addrspace 3) ; CI: [[GEP4:%[0-9]+]]:_(p3) = G_GEP [[GEP3]], [[C]](s32) ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p3) :: (load 1, addrspace 3) @@ -7667,19 +8632,22 @@ body: | ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]] - ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY5]](s32) - ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; CI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] + ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY5]](s32) + ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) + ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; CI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; CI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; CI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C5]] - ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY7]](s32) - ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) - ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32) + ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY7]](s32) + ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) + ; CI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] + ; CI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C6]](s32) + ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) ; CI: [[GEP7:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C4]](s32) ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p3) :: (load 1, addrspace 3) ; CI: [[GEP8:%[0-9]+]]:_(p3) = G_GEP [[GEP7]], [[C]](s32) @@ -7693,19 +8661,22 @@ body: | ; CI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) ; CI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C5]] - ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY9]](s32) - ; CI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; CI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] + ; CI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY9]](s32) + ; CI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) + ; CI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] ; CI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; CI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C3]] ; CI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) ; CI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C5]] - ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY11]](s32) - ; CI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) - ; CI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] - ; CI: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16) - ; CI: [[GEP11:%[0-9]+]]:_(p3) = G_GEP [[GEP7]], [[C6]](s32) + ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY11]](s32) + ; CI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) + ; CI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] + ; CI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; CI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; CI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C6]](s32) + ; CI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] + ; CI: [[GEP11:%[0-9]+]]:_(p3) = G_GEP [[GEP7]], [[C7]](s32) ; CI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[GEP11]](p3) :: (load 1, addrspace 3) ; CI: [[GEP12:%[0-9]+]]:_(p3) = G_GEP [[GEP11]], [[C]](s32) ; CI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[GEP12]](p3) :: (load 1, addrspace 3) @@ -7718,19 +8689,22 @@ body: | ; CI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) ; CI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C5]] - ; CI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY13]](s32) - ; CI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) - ; CI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] + ; CI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY13]](s32) + ; CI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) + ; CI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] ; CI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; CI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C3]] ; CI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32) ; CI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C5]] - ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY15]](s32) - ; CI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) - ; CI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] - ; CI: [[MV3:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR6]](s16), [[OR7]](s16) - ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MV2]](s32), [[MV3]](s32) + ; CI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY15]](s32) + ; CI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL10]](s32) + ; CI: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] + ; CI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; CI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR10]](s16) + ; CI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C6]](s32) + ; CI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR8]](s32), [[OR11]](s32) ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s32>), [[BUILD_VECTOR1]](<2 x s32>) ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>) ; CI-DS128-LABEL: name: test_load_local_v4s32_align1 @@ -7764,9 +8738,13 @@ body: | ; CI-DS128: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY3]](s32) ; CI-DS128: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CI-DS128: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI-DS128: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; CI-DS128: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-DS128: [[GEP3:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C6]](s32) + ; CI-DS128: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI-DS128: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI-DS128: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-DS128: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32) + ; CI-DS128: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; CI-DS128: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CI-DS128: [[GEP3:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C7]](s32) ; CI-DS128: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p3) :: (load 1, addrspace 3) ; CI-DS128: [[GEP4:%[0-9]+]]:_(p3) = G_GEP [[GEP3]], [[C]](s32) ; CI-DS128: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p3) :: (load 1, addrspace 3) @@ -7779,18 +8757,21 @@ body: | ; CI-DS128: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI-DS128: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) ; CI-DS128: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]] - ; CI-DS128: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY5]](s32) - ; CI-DS128: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; CI-DS128: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] + ; CI-DS128: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY5]](s32) + ; CI-DS128: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) + ; CI-DS128: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; CI-DS128: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; CI-DS128: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; CI-DS128: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI-DS128: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) ; CI-DS128: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C5]] - ; CI-DS128: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY7]](s32) - ; CI-DS128: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; CI-DS128: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI-DS128: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) + ; CI-DS128: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY7]](s32) + ; CI-DS128: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) + ; CI-DS128: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] + ; CI-DS128: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI-DS128: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; CI-DS128: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C6]](s32) + ; CI-DS128: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] ; CI-DS128: [[GEP7:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C4]](s32) ; CI-DS128: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p3) :: (load 1, addrspace 3) ; CI-DS128: [[GEP8:%[0-9]+]]:_(p3) = G_GEP [[GEP7]], [[C]](s32) @@ -7804,20 +8785,23 @@ body: | ; CI-DS128: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI-DS128: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) ; CI-DS128: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C5]] - ; CI-DS128: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY9]](s32) - ; CI-DS128: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; CI-DS128: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] + ; CI-DS128: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY9]](s32) + ; CI-DS128: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) + ; CI-DS128: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] ; CI-DS128: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; CI-DS128: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C3]] ; CI-DS128: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI-DS128: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) ; CI-DS128: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C5]] - ; CI-DS128: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY11]](s32) - ; CI-DS128: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) - ; CI-DS128: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] - ; CI-DS128: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16) - ; CI-DS128: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CI-DS128: [[GEP11:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C7]](s32) + ; CI-DS128: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY11]](s32) + ; CI-DS128: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) + ; CI-DS128: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] + ; CI-DS128: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; CI-DS128: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; CI-DS128: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C6]](s32) + ; CI-DS128: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] + ; CI-DS128: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CI-DS128: [[GEP11:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C8]](s32) ; CI-DS128: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[GEP11]](p3) :: (load 1, addrspace 3) ; CI-DS128: [[GEP12:%[0-9]+]]:_(p3) = G_GEP [[GEP11]], [[C]](s32) ; CI-DS128: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[GEP12]](p3) :: (load 1, addrspace 3) @@ -7830,19 +8814,22 @@ body: | ; CI-DS128: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI-DS128: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) ; CI-DS128: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C5]] - ; CI-DS128: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY13]](s32) - ; CI-DS128: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) - ; CI-DS128: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] + ; CI-DS128: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY13]](s32) + ; CI-DS128: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) + ; CI-DS128: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] ; CI-DS128: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; CI-DS128: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C3]] ; CI-DS128: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI-DS128: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32) ; CI-DS128: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C5]] - ; CI-DS128: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY15]](s32) - ; CI-DS128: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) - ; CI-DS128: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] - ; CI-DS128: [[MV3:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR6]](s16), [[OR7]](s16) - ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32), [[MV2]](s32), [[MV3]](s32) + ; CI-DS128: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY15]](s32) + ; CI-DS128: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL10]](s32) + ; CI-DS128: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] + ; CI-DS128: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; CI-DS128: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR10]](s16) + ; CI-DS128: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C6]](s32) + ; CI-DS128: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; CI-DS128: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) ; VI-LABEL: name: test_load_local_v4s32_align1 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 @@ -7870,9 +8857,13 @@ body: | ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; VI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI: [[GEP3:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C5]](s32) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; VI: [[GEP3:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C6]](s32) ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p3) :: (load 1, addrspace 3) ; VI: [[GEP4:%[0-9]+]]:_(p3) = G_GEP [[GEP3]], [[C]](s32) ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p3) :: (load 1, addrspace 3) @@ -7884,18 +8875,21 @@ body: | ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C3]] ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C3]] - ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) - ; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] + ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) + ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]] ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; VI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C3]] - ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) - ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; VI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32) - ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI: [[GEP7:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C6]](s32) + ; VI: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) + ; VI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL4]] + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C5]](s32) + ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) + ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; VI: [[GEP7:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C7]](s32) ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p3) :: (load 1, addrspace 3) ; VI: [[GEP8:%[0-9]+]]:_(p3) = G_GEP [[GEP7]], [[C]](s32) ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p3) :: (load 1, addrspace 3) @@ -7907,16 +8901,19 @@ body: | ; VI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C3]] ; VI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) ; VI: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C3]] - ; VI: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C4]](s16) - ; VI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]] + ; VI: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C4]](s16) + ; VI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]] ; VI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; VI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C3]] ; VI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) ; VI: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C3]] - ; VI: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C4]](s16) - ; VI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] - ; VI: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16) - ; VI: [[GEP11:%[0-9]+]]:_(p3) = G_GEP [[GEP7]], [[C5]](s32) + ; VI: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C4]](s16) + ; VI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]] + ; VI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; VI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; VI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C5]](s32) + ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] + ; VI: [[GEP11:%[0-9]+]]:_(p3) = G_GEP [[GEP7]], [[C6]](s32) ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[GEP11]](p3) :: (load 1, addrspace 3) ; VI: [[GEP12:%[0-9]+]]:_(p3) = G_GEP [[GEP11]], [[C]](s32) ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[GEP12]](p3) :: (load 1, addrspace 3) @@ -7928,16 +8925,19 @@ body: | ; VI: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C3]] ; VI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32) ; VI: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C3]] - ; VI: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C4]](s16) - ; VI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL6]] + ; VI: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C4]](s16) + ; VI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL9]] ; VI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; VI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C3]] ; VI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32) ; VI: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C3]] - ; VI: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C4]](s16) - ; VI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL7]] - ; VI: [[MV3:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR6]](s16), [[OR7]](s16) - ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MV2]](s32), [[MV3]](s32) + ; VI: [[SHL10:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C4]](s16) + ; VI: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL10]] + ; VI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; VI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR10]](s16) + ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C5]](s32) + ; VI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR8]](s32), [[OR11]](s32) ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s32>), [[BUILD_VECTOR1]](<2 x s32>) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>) ; GFX9-LABEL: name: test_load_local_v4s32_align1 @@ -7966,9 +8966,13 @@ body: | ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX9: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9: [[GEP3:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C5]](s32) + ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX9: [[GEP3:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C6]](s32) ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p3) :: (load 1, addrspace 3) ; GFX9: [[GEP4:%[0-9]+]]:_(p3) = G_GEP [[GEP3]], [[C]](s32) ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p3) :: (load 1, addrspace 3) @@ -7980,18 +8984,21 @@ body: | ; GFX9: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C3]] ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) ; GFX9: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C3]] - ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) - ; GFX9: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] + ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) + ; GFX9: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]] ; GFX9: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; GFX9: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; GFX9: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) ; GFX9: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C3]] - ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) - ; GFX9: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; GFX9: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32) - ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9: [[GEP7:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C6]](s32) + ; GFX9: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) + ; GFX9: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL4]] + ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C5]](s32) + ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) + ; GFX9: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9: [[GEP7:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C7]](s32) ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p3) :: (load 1, addrspace 3) ; GFX9: [[GEP8:%[0-9]+]]:_(p3) = G_GEP [[GEP7]], [[C]](s32) ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p3) :: (load 1, addrspace 3) @@ -8003,16 +9010,19 @@ body: | ; GFX9: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C3]] ; GFX9: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) ; GFX9: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C3]] - ; GFX9: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C4]](s16) - ; GFX9: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]] + ; GFX9: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C4]](s16) + ; GFX9: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]] ; GFX9: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; GFX9: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C3]] ; GFX9: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) ; GFX9: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C3]] - ; GFX9: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C4]](s16) - ; GFX9: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] - ; GFX9: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16) - ; GFX9: [[GEP11:%[0-9]+]]:_(p3) = G_GEP [[GEP7]], [[C5]](s32) + ; GFX9: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C4]](s16) + ; GFX9: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]] + ; GFX9: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; GFX9: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; GFX9: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C5]](s32) + ; GFX9: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] + ; GFX9: [[GEP11:%[0-9]+]]:_(p3) = G_GEP [[GEP7]], [[C6]](s32) ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[GEP11]](p3) :: (load 1, addrspace 3) ; GFX9: [[GEP12:%[0-9]+]]:_(p3) = G_GEP [[GEP11]], [[C]](s32) ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[GEP12]](p3) :: (load 1, addrspace 3) @@ -8024,16 +9034,19 @@ body: | ; GFX9: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C3]] ; GFX9: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32) ; GFX9: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C3]] - ; GFX9: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C4]](s16) - ; GFX9: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL6]] + ; GFX9: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C4]](s16) + ; GFX9: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL9]] ; GFX9: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; GFX9: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C3]] ; GFX9: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32) ; GFX9: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C3]] - ; GFX9: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C4]](s16) - ; GFX9: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL7]] - ; GFX9: [[MV3:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR6]](s16), [[OR7]](s16) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MV2]](s32), [[MV3]](s32) + ; GFX9: [[SHL10:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C4]](s16) + ; GFX9: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL10]] + ; GFX9: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; GFX9: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR10]](s16) + ; GFX9: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C5]](s32) + ; GFX9: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR8]](s32), [[OR11]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s32>), [[BUILD_VECTOR1]](<2 x s32>) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>) %0:_(p3) = COPY $vgpr0 @@ -8411,7 +9424,16 @@ body: | ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY7]](s32) ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; SI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32) + ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32) + ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; SI: [[GEP7:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C8]](s32) ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p3) :: (load 1, addrspace 3) ; SI: [[GEP8:%[0-9]+]]:_(p3) = G_GEP [[GEP7]], [[C]](s32) @@ -8433,34 +9455,42 @@ body: | ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C9]] - ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY9]](s32) - ; SI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; SI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] + ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY9]](s32) + ; SI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) + ; SI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] ; SI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; SI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C9]] - ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY11]](s32) - ; SI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) - ; SI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] + ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY11]](s32) + ; SI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) + ; SI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] ; SI: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32) ; SI: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]] ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; SI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) ; SI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C9]] - ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY13]](s32) - ; SI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) - ; SI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] + ; SI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY13]](s32) + ; SI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32) + ; SI: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] ; SI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; SI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]] ; SI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; SI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32) ; SI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C9]] - ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY15]](s32) - ; SI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) - ; SI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] - ; SI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16), [[OR6]](s16), [[OR7]](s16) + ; SI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY15]](s32) + ; SI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) + ; SI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] + ; SI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; SI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; SI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C10]](s32) + ; SI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] + ; SI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) + ; SI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; SI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C10]](s32) + ; SI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; SI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; CI-LABEL: name: test_load_local_v2s64_align16 @@ -8522,7 +9552,16 @@ body: | ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY7]](s32) ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) + ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32) + ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; CI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32) + ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; CI: [[GEP7:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C8]](s32) ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p3) :: (load 1, addrspace 3) ; CI: [[GEP8:%[0-9]+]]:_(p3) = G_GEP [[GEP7]], [[C]](s32) @@ -8544,34 +9583,42 @@ body: | ; CI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) ; CI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C9]] - ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY9]](s32) - ; CI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; CI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] + ; CI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY9]](s32) + ; CI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) + ; CI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] ; CI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; CI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] ; CI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) ; CI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C9]] - ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY11]](s32) - ; CI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) - ; CI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] + ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY11]](s32) + ; CI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) + ; CI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] ; CI: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32) ; CI: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]] ; CI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) ; CI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C9]] - ; CI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY13]](s32) - ; CI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) - ; CI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] + ; CI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY13]](s32) + ; CI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32) + ; CI: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] ; CI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; CI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]] ; CI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32) ; CI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C9]] - ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY15]](s32) - ; CI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) - ; CI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] - ; CI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16), [[OR6]](s16), [[OR7]](s16) + ; CI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY15]](s32) + ; CI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) + ; CI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] + ; CI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; CI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; CI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C10]](s32) + ; CI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] + ; CI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) + ; CI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; CI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C10]](s32) + ; CI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; CI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; CI-DS128-LABEL: name: test_load_local_v2s64_align16 @@ -8633,7 +9680,16 @@ body: | ; CI-DS128: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY7]](s32) ; CI-DS128: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; CI-DS128: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI-DS128: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) + ; CI-DS128: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI-DS128: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI-DS128: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-DS128: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32) + ; CI-DS128: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; CI-DS128: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CI-DS128: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI-DS128: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32) + ; CI-DS128: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; CI-DS128: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; CI-DS128: [[GEP7:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C8]](s32) ; CI-DS128: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p3) :: (load 1, addrspace 3) ; CI-DS128: [[GEP8:%[0-9]+]]:_(p3) = G_GEP [[GEP7]], [[C]](s32) @@ -8655,34 +9711,42 @@ body: | ; CI-DS128: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI-DS128: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) ; CI-DS128: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C9]] - ; CI-DS128: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY9]](s32) - ; CI-DS128: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; CI-DS128: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] + ; CI-DS128: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY9]](s32) + ; CI-DS128: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) + ; CI-DS128: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] ; CI-DS128: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; CI-DS128: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] ; CI-DS128: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI-DS128: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) ; CI-DS128: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C9]] - ; CI-DS128: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY11]](s32) - ; CI-DS128: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) - ; CI-DS128: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] + ; CI-DS128: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY11]](s32) + ; CI-DS128: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) + ; CI-DS128: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] ; CI-DS128: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32) ; CI-DS128: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]] ; CI-DS128: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI-DS128: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) ; CI-DS128: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C9]] - ; CI-DS128: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY13]](s32) - ; CI-DS128: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) - ; CI-DS128: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] + ; CI-DS128: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY13]](s32) + ; CI-DS128: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32) + ; CI-DS128: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] ; CI-DS128: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; CI-DS128: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]] ; CI-DS128: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI-DS128: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32) ; CI-DS128: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C9]] - ; CI-DS128: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY15]](s32) - ; CI-DS128: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) - ; CI-DS128: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] - ; CI-DS128: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16), [[OR6]](s16), [[OR7]](s16) + ; CI-DS128: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY15]](s32) + ; CI-DS128: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) + ; CI-DS128: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] + ; CI-DS128: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; CI-DS128: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; CI-DS128: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C10]](s32) + ; CI-DS128: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] + ; CI-DS128: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) + ; CI-DS128: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; CI-DS128: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C10]](s32) + ; CI-DS128: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; CI-DS128: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; CI-DS128: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; VI-LABEL: name: test_load_local_v2s64_align16 @@ -8735,9 +9799,18 @@ body: | ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]] ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16) ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) - ; VI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI: [[GEP7:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C9]](s32) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) + ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) + ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) + ; VI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; VI: [[GEP7:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C10]](s32) ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p3) :: (load 1, addrspace 3) ; VI: [[GEP8:%[0-9]+]]:_(p3) = G_GEP [[GEP7]], [[C]](s32) ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p3) :: (load 1, addrspace 3) @@ -8757,27 +9830,35 @@ body: | ; VI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] ; VI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) ; VI: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]] - ; VI: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) - ; VI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]] + ; VI: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) + ; VI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]] ; VI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; VI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] ; VI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) ; VI: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]] - ; VI: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) - ; VI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] + ; VI: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) + ; VI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]] ; VI: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32) ; VI: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]] ; VI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32) ; VI: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]] - ; VI: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16) - ; VI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL6]] + ; VI: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16) + ; VI: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]] ; VI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; VI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]] ; VI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32) ; VI: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]] - ; VI: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16) - ; VI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL7]] - ; VI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16), [[OR6]](s16), [[OR7]](s16) + ; VI: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16) + ; VI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]] + ; VI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; VI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; VI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32) + ; VI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] + ; VI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) + ; VI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32) + ; VI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; VI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; GFX9-LABEL: name: test_load_local_v2s64_align16 @@ -8830,9 +9911,18 @@ body: | ; GFX9: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]] ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16) ; GFX9: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) - ; GFX9: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9: [[GEP7:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C9]](s32) + ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) + ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) + ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) + ; GFX9: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9: [[GEP7:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C10]](s32) ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p3) :: (load 1, addrspace 3) ; GFX9: [[GEP8:%[0-9]+]]:_(p3) = G_GEP [[GEP7]], [[C]](s32) ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p3) :: (load 1, addrspace 3) @@ -8852,27 +9942,35 @@ body: | ; GFX9: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] ; GFX9: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) ; GFX9: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]] - ; GFX9: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) - ; GFX9: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]] + ; GFX9: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) + ; GFX9: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]] ; GFX9: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; GFX9: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] ; GFX9: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) ; GFX9: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]] - ; GFX9: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) - ; GFX9: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] + ; GFX9: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) + ; GFX9: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]] ; GFX9: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32) ; GFX9: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]] ; GFX9: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32) ; GFX9: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]] - ; GFX9: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16) - ; GFX9: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL6]] + ; GFX9: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16) + ; GFX9: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]] ; GFX9: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; GFX9: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]] ; GFX9: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32) ; GFX9: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]] - ; GFX9: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16) - ; GFX9: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL7]] - ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16), [[OR6]](s16), [[OR7]](s16) + ; GFX9: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16) + ; GFX9: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]] + ; GFX9: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; GFX9: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; GFX9: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32) + ; GFX9: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] + ; GFX9: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) + ; GFX9: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; GFX9: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32) + ; GFX9: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) %0:_(p3) = COPY $vgpr0 @@ -9396,9 +10494,13 @@ body: | ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; SI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI: [[GEP3:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C6]](s32) + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32) + ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; SI: [[GEP3:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C7]](s32) ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p3) :: (load 1, addrspace 3) ; SI: [[GEP4:%[0-9]+]]:_(p3) = G_GEP [[GEP3]], [[C]](s32) ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p3) :: (load 1, addrspace 3) @@ -9411,19 +10513,22 @@ body: | ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] - ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) - ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; SI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] + ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) + ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) + ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; SI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; SI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C5]] - ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY6]](s32) - ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; SI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32) + ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY6]](s32) + ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) + ; SI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] + ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C6]](s32) + ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; CI-LABEL: name: test_extload_local_v2s32_from_4_align1 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 @@ -9455,9 +10560,13 @@ body: | ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI: [[GEP3:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C6]](s32) + ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32) + ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CI: [[GEP3:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C7]](s32) ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p3) :: (load 1, addrspace 3) ; CI: [[GEP4:%[0-9]+]]:_(p3) = G_GEP [[GEP3]], [[C]](s32) ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p3) :: (load 1, addrspace 3) @@ -9470,19 +10579,22 @@ body: | ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] - ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) - ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; CI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] + ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) + ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) + ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; CI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; CI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C5]] - ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY6]](s32) - ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) - ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32) + ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY6]](s32) + ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) + ; CI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] + ; CI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C6]](s32) + ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) ; CI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; CI-DS128-LABEL: name: test_extload_local_v2s32_from_4_align1 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 @@ -9514,9 +10626,13 @@ body: | ; CI-DS128: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) ; CI-DS128: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CI-DS128: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI-DS128: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; CI-DS128: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-DS128: [[GEP3:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C6]](s32) + ; CI-DS128: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI-DS128: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI-DS128: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-DS128: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32) + ; CI-DS128: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; CI-DS128: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CI-DS128: [[GEP3:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C7]](s32) ; CI-DS128: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p3) :: (load 1, addrspace 3) ; CI-DS128: [[GEP4:%[0-9]+]]:_(p3) = G_GEP [[GEP3]], [[C]](s32) ; CI-DS128: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p3) :: (load 1, addrspace 3) @@ -9529,19 +10645,22 @@ body: | ; CI-DS128: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI-DS128: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) ; CI-DS128: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] - ; CI-DS128: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) - ; CI-DS128: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; CI-DS128: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] + ; CI-DS128: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) + ; CI-DS128: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) + ; CI-DS128: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; CI-DS128: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; CI-DS128: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; CI-DS128: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI-DS128: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) ; CI-DS128: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C5]] - ; CI-DS128: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY6]](s32) - ; CI-DS128: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; CI-DS128: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI-DS128: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) - ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32) + ; CI-DS128: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY6]](s32) + ; CI-DS128: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) + ; CI-DS128: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] + ; CI-DS128: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI-DS128: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; CI-DS128: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C6]](s32) + ; CI-DS128: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) ; CI-DS128: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; VI-LABEL: name: test_extload_local_v2s32_from_4_align1 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 @@ -9569,9 +10688,13 @@ body: | ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; VI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI: [[GEP3:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C5]](s32) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; VI: [[GEP3:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C6]](s32) ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p3) :: (load 1, addrspace 3) ; VI: [[GEP4:%[0-9]+]]:_(p3) = G_GEP [[GEP3]], [[C]](s32) ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p3) :: (load 1, addrspace 3) @@ -9583,16 +10706,19 @@ body: | ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C3]] ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C3]] - ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) - ; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] + ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) + ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]] ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; VI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C3]] - ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) - ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; VI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32) + ; VI: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) + ; VI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL4]] + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C5]](s32) + ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-LABEL: name: test_extload_local_v2s32_from_4_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 @@ -9620,9 +10746,13 @@ body: | ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX9: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9: [[GEP3:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C5]](s32) + ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX9: [[GEP3:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C6]](s32) ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p3) :: (load 1, addrspace 3) ; GFX9: [[GEP4:%[0-9]+]]:_(p3) = G_GEP [[GEP3]], [[C]](s32) ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p3) :: (load 1, addrspace 3) @@ -9634,16 +10764,19 @@ body: | ; GFX9: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C3]] ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) ; GFX9: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C3]] - ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) - ; GFX9: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] + ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) + ; GFX9: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]] ; GFX9: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; GFX9: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; GFX9: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) ; GFX9: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C3]] - ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) - ; GFX9: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; GFX9: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32) + ; GFX9: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) + ; GFX9: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL4]] + ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C5]](s32) + ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(p3) = COPY $vgpr0 %1:_(<2 x s32>) = G_LOAD %0 :: (load 4, align 1, addrspace 3) @@ -9659,97 +10792,137 @@ body: | ; SI-LABEL: name: test_extload_local_v2s32_from_4_align2 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[GEP:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C]](s32) ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p3) :: (load 2, addrspace 3) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; SI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI: [[GEP1:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C1]](s32) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; SI: [[GEP1:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C3]](s32) ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p3) :: (load 2, addrspace 3) - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; SI: [[GEP2:%[0-9]+]]:_(p3) = G_GEP [[GEP1]], [[C]](s32) ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p3) :: (load 2, addrspace 3) - ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; SI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC2]](s16), [[TRUNC3]](s16) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32) + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; CI-LABEL: name: test_extload_local_v2s32_from_4_align2 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) - ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[GEP:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C]](s32) ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p3) :: (load 2, addrspace 3) - ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; CI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI: [[GEP1:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C1]](s32) + ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CI: [[GEP1:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C3]](s32) ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p3) :: (load 2, addrspace 3) - ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI: [[GEP2:%[0-9]+]]:_(p3) = G_GEP [[GEP1]], [[C]](s32) ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p3) :: (load 2, addrspace 3) - ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; CI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC2]](s16), [[TRUNC3]](s16) - ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32) + ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) ; CI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; CI-DS128-LABEL: name: test_extload_local_v2s32_from_4_align2 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) - ; CI-DS128: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI-DS128: [[GEP:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C]](s32) ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p3) :: (load 2, addrspace 3) - ; CI-DS128: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; CI-DS128: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI-DS128: [[GEP1:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C1]](s32) + ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-DS128: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-DS128: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CI-DS128: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-DS128: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-DS128: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; CI-DS128: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI-DS128: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CI-DS128: [[GEP1:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C3]](s32) ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p3) :: (load 2, addrspace 3) - ; CI-DS128: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI-DS128: [[GEP2:%[0-9]+]]:_(p3) = G_GEP [[GEP1]], [[C]](s32) ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p3) :: (load 2, addrspace 3) - ; CI-DS128: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; CI-DS128: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC2]](s16), [[TRUNC3]](s16) - ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32) + ; CI-DS128: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI-DS128: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; CI-DS128: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI-DS128: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; CI-DS128: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; CI-DS128: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) ; CI-DS128: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; VI-LABEL: name: test_extload_local_v2s32_from_4_align2 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[GEP:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C]](s32) ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p3) :: (load 2, addrspace 3) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; VI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI: [[GEP1:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C1]](s32) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; VI: [[GEP1:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C3]](s32) ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p3) :: (load 2, addrspace 3) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; VI: [[GEP2:%[0-9]+]]:_(p3) = G_GEP [[GEP1]], [[C]](s32) ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p3) :: (load 2, addrspace 3) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; VI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC2]](s16), [[TRUNC3]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32) + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-LABEL: name: test_extload_local_v2s32_from_4_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[GEP:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C]](s32) ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p3) :: (load 2, addrspace 3) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9: [[GEP1:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C1]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX9: [[GEP1:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C3]](s32) ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p3) :: (load 2, addrspace 3) - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; GFX9: [[GEP2:%[0-9]+]]:_(p3) = G_GEP [[GEP1]], [[C]](s32) ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p3) :: (load 2, addrspace 3) - ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX9: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(p3) = COPY $vgpr0 %1:_(<2 x s32>) = G_LOAD %0 :: (load 4, align 2, addrspace 3) @@ -10019,7 +11192,16 @@ body: | ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY7]](s32) ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; SI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32) + ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32) + ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; SI: [[GEP7:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C8]](s32) ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p3) :: (load 1, addrspace 3) ; SI: [[GEP8:%[0-9]+]]:_(p3) = G_GEP [[GEP7]], [[C]](s32) @@ -10033,23 +11215,26 @@ body: | ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C9]] - ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY9]](s32) - ; SI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; SI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] + ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY9]](s32) + ; SI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) + ; SI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] ; SI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; SI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C9]] - ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY11]](s32) - ; SI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) - ; SI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] - ; SI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16) + ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY11]](s32) + ; SI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) + ; SI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] + ; SI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; SI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; SI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C10]](s32) + ; SI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] ; SI: [[DEF:%[0-9]+]]:_(s96) = G_IMPLICIT_DEF ; SI: [[INSERT:%[0-9]+]]:_(s96) = G_INSERT [[DEF]], [[MV]](s64), 0 - ; SI: [[INSERT1:%[0-9]+]]:_(s96) = G_INSERT [[INSERT]], [[MV1]](s32), 64 - ; SI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; SI: [[GEP11:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C10]](s32) + ; SI: [[INSERT1:%[0-9]+]]:_(s96) = G_INSERT [[INSERT]], [[OR8]](s32), 64 + ; SI: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; SI: [[GEP11:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C11]](s32) ; SI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[GEP11]](p3) :: (load 1, addrspace 3) ; SI: [[GEP12:%[0-9]+]]:_(p3) = G_GEP [[GEP11]], [[C]](s32) ; SI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[GEP12]](p3) :: (load 1, addrspace 3) @@ -10070,34 +11255,42 @@ body: | ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; SI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) ; SI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C9]] - ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY13]](s32) - ; SI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) - ; SI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] + ; SI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY13]](s32) + ; SI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) + ; SI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] ; SI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; SI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]] ; SI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; SI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32) ; SI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C9]] - ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY15]](s32) - ; SI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) - ; SI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] + ; SI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY15]](s32) + ; SI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL10]](s32) + ; SI: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] ; SI: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD16]](s32) ; SI: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C7]] ; SI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; SI: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LOAD17]](s32) ; SI: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY18]], [[C9]] - ; SI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[COPY17]](s32) - ; SI: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32) - ; SI: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[TRUNC17]] + ; SI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[COPY17]](s32) + ; SI: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[SHL11]](s32) + ; SI: [[OR11:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[TRUNC17]] ; SI: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32) ; SI: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C7]] ; SI: [[COPY19:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; SI: [[COPY20:%[0-9]+]]:_(s32) = COPY [[LOAD19]](s32) ; SI: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY20]], [[C9]] - ; SI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[COPY19]](s32) - ; SI: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) - ; SI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[TRUNC19]] - ; SI: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR6]](s16), [[OR7]](s16), [[OR8]](s16), [[OR9]](s16) + ; SI: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[COPY19]](s32) + ; SI: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[SHL12]](s32) + ; SI: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[TRUNC19]] + ; SI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; SI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR10]](s16) + ; SI: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C10]](s32) + ; SI: [[OR13:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL13]] + ; SI: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR11]](s16) + ; SI: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16) + ; SI: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C10]](s32) + ; SI: [[OR14:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL14]] + ; SI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR13]](s32), [[OR14]](s32) ; SI: [[GEP19:%[0-9]+]]:_(p3) = G_GEP [[GEP11]], [[C8]](s32) ; SI: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[GEP19]](p3) :: (load 1, addrspace 3) ; SI: [[GEP20:%[0-9]+]]:_(p3) = G_GEP [[GEP19]], [[C]](s32) @@ -10111,21 +11304,24 @@ body: | ; SI: [[COPY21:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; SI: [[COPY22:%[0-9]+]]:_(s32) = COPY [[LOAD21]](s32) ; SI: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY22]], [[C9]] - ; SI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[COPY21]](s32) - ; SI: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[SHL10]](s32) - ; SI: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[TRUNC21]] + ; SI: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[COPY21]](s32) + ; SI: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[SHL15]](s32) + ; SI: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[TRUNC21]] ; SI: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32) ; SI: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C7]] ; SI: [[COPY23:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; SI: [[COPY24:%[0-9]+]]:_(s32) = COPY [[LOAD23]](s32) ; SI: [[AND23:%[0-9]+]]:_(s32) = G_AND [[COPY24]], [[C9]] - ; SI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[COPY23]](s32) - ; SI: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[SHL11]](s32) - ; SI: [[OR11:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[TRUNC23]] - ; SI: [[MV3:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR10]](s16), [[OR11]](s16) + ; SI: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[COPY23]](s32) + ; SI: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[SHL16]](s32) + ; SI: [[OR16:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[TRUNC23]] + ; SI: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR15]](s16) + ; SI: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR16]](s16) + ; SI: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C10]](s32) + ; SI: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]] ; SI: [[DEF1:%[0-9]+]]:_(s96) = G_IMPLICIT_DEF - ; SI: [[INSERT2:%[0-9]+]]:_(s96) = G_INSERT [[DEF1]], [[MV2]](s64), 0 - ; SI: [[INSERT3:%[0-9]+]]:_(s96) = G_INSERT [[INSERT2]], [[MV3]](s32), 64 + ; SI: [[INSERT2:%[0-9]+]]:_(s96) = G_INSERT [[DEF1]], [[MV1]](s64), 0 + ; SI: [[INSERT3:%[0-9]+]]:_(s96) = G_INSERT [[INSERT2]], [[OR17]](s32), 64 ; SI: [[COPY25:%[0-9]+]]:_(s96) = COPY [[INSERT1]](s96) ; SI: [[COPY26:%[0-9]+]]:_(s96) = COPY [[INSERT3]](s96) ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY25]](s96) @@ -10189,7 +11385,16 @@ body: | ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY7]](s32) ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) + ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32) + ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; CI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32) + ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; CI: [[GEP7:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C8]](s32) ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p3) :: (load 1, addrspace 3) ; CI: [[GEP8:%[0-9]+]]:_(p3) = G_GEP [[GEP7]], [[C]](s32) @@ -10203,23 +11408,26 @@ body: | ; CI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) ; CI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C9]] - ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY9]](s32) - ; CI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; CI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] + ; CI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY9]](s32) + ; CI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) + ; CI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] ; CI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; CI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] ; CI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) ; CI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C9]] - ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY11]](s32) - ; CI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) - ; CI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] - ; CI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16) + ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY11]](s32) + ; CI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) + ; CI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] + ; CI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; CI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; CI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C10]](s32) + ; CI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] ; CI: [[DEF:%[0-9]+]]:_(s96) = G_IMPLICIT_DEF ; CI: [[INSERT:%[0-9]+]]:_(s96) = G_INSERT [[DEF]], [[MV]](s64), 0 - ; CI: [[INSERT1:%[0-9]+]]:_(s96) = G_INSERT [[INSERT]], [[MV1]](s32), 64 - ; CI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CI: [[GEP11:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C10]](s32) + ; CI: [[INSERT1:%[0-9]+]]:_(s96) = G_INSERT [[INSERT]], [[OR8]](s32), 64 + ; CI: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CI: [[GEP11:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C11]](s32) ; CI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[GEP11]](p3) :: (load 1, addrspace 3) ; CI: [[GEP12:%[0-9]+]]:_(p3) = G_GEP [[GEP11]], [[C]](s32) ; CI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[GEP12]](p3) :: (load 1, addrspace 3) @@ -10240,34 +11448,42 @@ body: | ; CI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) ; CI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C9]] - ; CI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY13]](s32) - ; CI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) - ; CI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] + ; CI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY13]](s32) + ; CI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) + ; CI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] ; CI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; CI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]] ; CI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32) ; CI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C9]] - ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY15]](s32) - ; CI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) - ; CI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] + ; CI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY15]](s32) + ; CI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL10]](s32) + ; CI: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] ; CI: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD16]](s32) ; CI: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C7]] ; CI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LOAD17]](s32) ; CI: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY18]], [[C9]] - ; CI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[COPY17]](s32) - ; CI: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32) - ; CI: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[TRUNC17]] + ; CI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[COPY17]](s32) + ; CI: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[SHL11]](s32) + ; CI: [[OR11:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[TRUNC17]] ; CI: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32) ; CI: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C7]] ; CI: [[COPY19:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI: [[COPY20:%[0-9]+]]:_(s32) = COPY [[LOAD19]](s32) ; CI: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY20]], [[C9]] - ; CI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[COPY19]](s32) - ; CI: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) - ; CI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[TRUNC19]] - ; CI: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR6]](s16), [[OR7]](s16), [[OR8]](s16), [[OR9]](s16) + ; CI: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[COPY19]](s32) + ; CI: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[SHL12]](s32) + ; CI: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[TRUNC19]] + ; CI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; CI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR10]](s16) + ; CI: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C10]](s32) + ; CI: [[OR13:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL13]] + ; CI: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR11]](s16) + ; CI: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16) + ; CI: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C10]](s32) + ; CI: [[OR14:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL14]] + ; CI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR13]](s32), [[OR14]](s32) ; CI: [[GEP19:%[0-9]+]]:_(p3) = G_GEP [[GEP11]], [[C8]](s32) ; CI: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[GEP19]](p3) :: (load 1, addrspace 3) ; CI: [[GEP20:%[0-9]+]]:_(p3) = G_GEP [[GEP19]], [[C]](s32) @@ -10281,21 +11497,24 @@ body: | ; CI: [[COPY21:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI: [[COPY22:%[0-9]+]]:_(s32) = COPY [[LOAD21]](s32) ; CI: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY22]], [[C9]] - ; CI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[COPY21]](s32) - ; CI: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[SHL10]](s32) - ; CI: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[TRUNC21]] + ; CI: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[COPY21]](s32) + ; CI: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[SHL15]](s32) + ; CI: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[TRUNC21]] ; CI: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32) ; CI: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C7]] ; CI: [[COPY23:%[0-9]+]]:_(s32) = COPY [[C8]](s32) ; CI: [[COPY24:%[0-9]+]]:_(s32) = COPY [[LOAD23]](s32) ; CI: [[AND23:%[0-9]+]]:_(s32) = G_AND [[COPY24]], [[C9]] - ; CI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[COPY23]](s32) - ; CI: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[SHL11]](s32) - ; CI: [[OR11:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[TRUNC23]] - ; CI: [[MV3:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR10]](s16), [[OR11]](s16) + ; CI: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[COPY23]](s32) + ; CI: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[SHL16]](s32) + ; CI: [[OR16:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[TRUNC23]] + ; CI: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR15]](s16) + ; CI: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR16]](s16) + ; CI: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C10]](s32) + ; CI: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]] ; CI: [[DEF1:%[0-9]+]]:_(s96) = G_IMPLICIT_DEF - ; CI: [[INSERT2:%[0-9]+]]:_(s96) = G_INSERT [[DEF1]], [[MV2]](s64), 0 - ; CI: [[INSERT3:%[0-9]+]]:_(s96) = G_INSERT [[INSERT2]], [[MV3]](s32), 64 + ; CI: [[INSERT2:%[0-9]+]]:_(s96) = G_INSERT [[DEF1]], [[MV1]](s64), 0 + ; CI: [[INSERT3:%[0-9]+]]:_(s96) = G_INSERT [[INSERT2]], [[OR17]](s32), 64 ; CI: [[COPY25:%[0-9]+]]:_(s96) = COPY [[INSERT1]](s96) ; CI: [[COPY26:%[0-9]+]]:_(s96) = COPY [[INSERT3]](s96) ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY25]](s96) @@ -10386,9 +11605,22 @@ body: | ; CI-DS128: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY11]](s32) ; CI-DS128: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) ; CI-DS128: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] - ; CI-DS128: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16), [[OR4]](s16), [[OR5]](s16) - ; CI-DS128: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CI-DS128: [[GEP11:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C13]](s32) + ; CI-DS128: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI-DS128: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI-DS128: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-DS128: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C13]](s32) + ; CI-DS128: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL6]] + ; CI-DS128: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CI-DS128: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI-DS128: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C13]](s32) + ; CI-DS128: [[OR7:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL7]] + ; CI-DS128: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; CI-DS128: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) + ; CI-DS128: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C13]](s32) + ; CI-DS128: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] + ; CI-DS128: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR6]](s32), [[OR7]](s32), [[OR8]](s32) + ; CI-DS128: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CI-DS128: [[GEP11:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C14]](s32) ; CI-DS128: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[GEP11]](p3) :: (load 1, addrspace 3) ; CI-DS128: [[GEP12:%[0-9]+]]:_(p3) = G_GEP [[GEP11]], [[C]](s32) ; CI-DS128: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[GEP12]](p3) :: (load 1, addrspace 3) @@ -10417,50 +11649,62 @@ body: | ; CI-DS128: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C7]](s32) ; CI-DS128: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) ; CI-DS128: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C12]] - ; CI-DS128: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY13]](s32) - ; CI-DS128: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) - ; CI-DS128: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] + ; CI-DS128: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY13]](s32) + ; CI-DS128: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) + ; CI-DS128: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] ; CI-DS128: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; CI-DS128: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C11]] ; CI-DS128: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C7]](s32) ; CI-DS128: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32) ; CI-DS128: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C12]] - ; CI-DS128: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY15]](s32) - ; CI-DS128: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) - ; CI-DS128: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] + ; CI-DS128: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY15]](s32) + ; CI-DS128: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL10]](s32) + ; CI-DS128: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] ; CI-DS128: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD16]](s32) ; CI-DS128: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C11]] ; CI-DS128: [[COPY17:%[0-9]+]]:_(s32) = COPY [[C7]](s32) ; CI-DS128: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LOAD17]](s32) ; CI-DS128: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY18]], [[C12]] - ; CI-DS128: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[COPY17]](s32) - ; CI-DS128: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32) - ; CI-DS128: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[TRUNC17]] + ; CI-DS128: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[COPY17]](s32) + ; CI-DS128: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[SHL11]](s32) + ; CI-DS128: [[OR11:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[TRUNC17]] ; CI-DS128: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32) ; CI-DS128: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C11]] ; CI-DS128: [[COPY19:%[0-9]+]]:_(s32) = COPY [[C7]](s32) ; CI-DS128: [[COPY20:%[0-9]+]]:_(s32) = COPY [[LOAD19]](s32) ; CI-DS128: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY20]], [[C12]] - ; CI-DS128: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[COPY19]](s32) - ; CI-DS128: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) - ; CI-DS128: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[TRUNC19]] + ; CI-DS128: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[COPY19]](s32) + ; CI-DS128: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[SHL12]](s32) + ; CI-DS128: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[TRUNC19]] ; CI-DS128: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD20]](s32) ; CI-DS128: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C11]] ; CI-DS128: [[COPY21:%[0-9]+]]:_(s32) = COPY [[C7]](s32) ; CI-DS128: [[COPY22:%[0-9]+]]:_(s32) = COPY [[LOAD21]](s32) ; CI-DS128: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY22]], [[C12]] - ; CI-DS128: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[COPY21]](s32) - ; CI-DS128: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[SHL10]](s32) - ; CI-DS128: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[TRUNC21]] + ; CI-DS128: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[COPY21]](s32) + ; CI-DS128: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[SHL13]](s32) + ; CI-DS128: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[TRUNC21]] ; CI-DS128: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32) ; CI-DS128: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C11]] ; CI-DS128: [[COPY23:%[0-9]+]]:_(s32) = COPY [[C7]](s32) ; CI-DS128: [[COPY24:%[0-9]+]]:_(s32) = COPY [[LOAD23]](s32) ; CI-DS128: [[AND23:%[0-9]+]]:_(s32) = G_AND [[COPY24]], [[C12]] - ; CI-DS128: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[COPY23]](s32) - ; CI-DS128: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[SHL11]](s32) - ; CI-DS128: [[OR11:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[TRUNC23]] - ; CI-DS128: [[MV1:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR6]](s16), [[OR7]](s16), [[OR8]](s16), [[OR9]](s16), [[OR10]](s16), [[OR11]](s16) + ; CI-DS128: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[COPY23]](s32) + ; CI-DS128: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[SHL14]](s32) + ; CI-DS128: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[TRUNC23]] + ; CI-DS128: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; CI-DS128: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR10]](s16) + ; CI-DS128: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C13]](s32) + ; CI-DS128: [[OR15:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL15]] + ; CI-DS128: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR11]](s16) + ; CI-DS128: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16) + ; CI-DS128: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C13]](s32) + ; CI-DS128: [[OR16:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL16]] + ; CI-DS128: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16) + ; CI-DS128: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16) + ; CI-DS128: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C13]](s32) + ; CI-DS128: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]] + ; CI-DS128: [[MV1:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR15]](s32), [[OR16]](s32), [[OR17]](s32) ; CI-DS128: [[COPY25:%[0-9]+]]:_(s96) = COPY [[MV]](s96) ; CI-DS128: [[COPY26:%[0-9]+]]:_(s96) = COPY [[MV1]](s96) ; CI-DS128: $vgpr0_vgpr1_vgpr2 = COPY [[COPY25]](s96) @@ -10515,9 +11759,18 @@ body: | ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]] ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16) ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) - ; VI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI: [[GEP7:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C9]](s32) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) + ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) + ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) + ; VI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; VI: [[GEP7:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C10]](s32) ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p3) :: (load 1, addrspace 3) ; VI: [[GEP8:%[0-9]+]]:_(p3) = G_GEP [[GEP7]], [[C]](s32) ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p3) :: (load 1, addrspace 3) @@ -10529,20 +11782,23 @@ body: | ; VI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] ; VI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) ; VI: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]] - ; VI: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) - ; VI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]] + ; VI: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) + ; VI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]] ; VI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; VI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] ; VI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) ; VI: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]] - ; VI: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) - ; VI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] - ; VI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16) + ; VI: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) + ; VI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]] + ; VI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; VI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; VI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32) + ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] ; VI: [[DEF:%[0-9]+]]:_(s96) = G_IMPLICIT_DEF ; VI: [[INSERT:%[0-9]+]]:_(s96) = G_INSERT [[DEF]], [[MV]](s64), 0 - ; VI: [[INSERT1:%[0-9]+]]:_(s96) = G_INSERT [[INSERT]], [[MV1]](s32), 64 - ; VI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; VI: [[GEP11:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C10]](s32) + ; VI: [[INSERT1:%[0-9]+]]:_(s96) = G_INSERT [[INSERT]], [[OR8]](s32), 64 + ; VI: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; VI: [[GEP11:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C11]](s32) ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[GEP11]](p3) :: (load 1, addrspace 3) ; VI: [[GEP12:%[0-9]+]]:_(p3) = G_GEP [[GEP11]], [[C]](s32) ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[GEP12]](p3) :: (load 1, addrspace 3) @@ -10562,28 +11818,36 @@ body: | ; VI: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]] ; VI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32) ; VI: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]] - ; VI: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16) - ; VI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL6]] + ; VI: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16) + ; VI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL9]] ; VI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; VI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]] ; VI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32) ; VI: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]] - ; VI: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16) - ; VI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL7]] + ; VI: [[SHL10:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16) + ; VI: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL10]] ; VI: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD16]](s32) ; VI: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C7]] ; VI: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD17]](s32) ; VI: [[AND17:%[0-9]+]]:_(s16) = G_AND [[TRUNC17]], [[C7]] - ; VI: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND17]], [[C8]](s16) - ; VI: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[SHL8]] + ; VI: [[SHL11:%[0-9]+]]:_(s16) = G_SHL [[AND17]], [[C8]](s16) + ; VI: [[OR11:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[SHL11]] ; VI: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32) ; VI: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C7]] ; VI: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD19]](s32) ; VI: [[AND19:%[0-9]+]]:_(s16) = G_AND [[TRUNC19]], [[C7]] - ; VI: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND19]], [[C8]](s16) - ; VI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[SHL9]] - ; VI: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR6]](s16), [[OR7]](s16), [[OR8]](s16), [[OR9]](s16) - ; VI: [[GEP19:%[0-9]+]]:_(p3) = G_GEP [[GEP11]], [[C9]](s32) + ; VI: [[SHL12:%[0-9]+]]:_(s16) = G_SHL [[AND19]], [[C8]](s16) + ; VI: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[SHL12]] + ; VI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; VI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR10]](s16) + ; VI: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32) + ; VI: [[OR13:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL13]] + ; VI: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR11]](s16) + ; VI: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16) + ; VI: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C9]](s32) + ; VI: [[OR14:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL14]] + ; VI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR13]](s32), [[OR14]](s32) + ; VI: [[GEP19:%[0-9]+]]:_(p3) = G_GEP [[GEP11]], [[C10]](s32) ; VI: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[GEP19]](p3) :: (load 1, addrspace 3) ; VI: [[GEP20:%[0-9]+]]:_(p3) = G_GEP [[GEP19]], [[C]](s32) ; VI: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[GEP20]](p3) :: (load 1, addrspace 3) @@ -10595,18 +11859,21 @@ body: | ; VI: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C7]] ; VI: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD21]](s32) ; VI: [[AND21:%[0-9]+]]:_(s16) = G_AND [[TRUNC21]], [[C7]] - ; VI: [[SHL10:%[0-9]+]]:_(s16) = G_SHL [[AND21]], [[C8]](s16) - ; VI: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[SHL10]] + ; VI: [[SHL15:%[0-9]+]]:_(s16) = G_SHL [[AND21]], [[C8]](s16) + ; VI: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[SHL15]] ; VI: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32) ; VI: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C7]] ; VI: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD23]](s32) ; VI: [[AND23:%[0-9]+]]:_(s16) = G_AND [[TRUNC23]], [[C7]] - ; VI: [[SHL11:%[0-9]+]]:_(s16) = G_SHL [[AND23]], [[C8]](s16) - ; VI: [[OR11:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[SHL11]] - ; VI: [[MV3:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR10]](s16), [[OR11]](s16) + ; VI: [[SHL16:%[0-9]+]]:_(s16) = G_SHL [[AND23]], [[C8]](s16) + ; VI: [[OR16:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[SHL16]] + ; VI: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR15]](s16) + ; VI: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR16]](s16) + ; VI: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C9]](s32) + ; VI: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]] ; VI: [[DEF1:%[0-9]+]]:_(s96) = G_IMPLICIT_DEF - ; VI: [[INSERT2:%[0-9]+]]:_(s96) = G_INSERT [[DEF1]], [[MV2]](s64), 0 - ; VI: [[INSERT3:%[0-9]+]]:_(s96) = G_INSERT [[INSERT2]], [[MV3]](s32), 64 + ; VI: [[INSERT2:%[0-9]+]]:_(s96) = G_INSERT [[DEF1]], [[MV1]](s64), 0 + ; VI: [[INSERT3:%[0-9]+]]:_(s96) = G_INSERT [[INSERT2]], [[OR17]](s32), 64 ; VI: [[COPY1:%[0-9]+]]:_(s96) = COPY [[INSERT1]](s96) ; VI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[INSERT3]](s96) ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) @@ -10661,9 +11928,18 @@ body: | ; GFX9: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]] ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16) ; GFX9: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) - ; GFX9: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9: [[GEP7:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C9]](s32) + ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32) + ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32) + ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) + ; GFX9: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9: [[GEP7:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C10]](s32) ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p3) :: (load 1, addrspace 3) ; GFX9: [[GEP8:%[0-9]+]]:_(p3) = G_GEP [[GEP7]], [[C]](s32) ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p3) :: (load 1, addrspace 3) @@ -10675,20 +11951,23 @@ body: | ; GFX9: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] ; GFX9: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) ; GFX9: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]] - ; GFX9: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) - ; GFX9: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]] + ; GFX9: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16) + ; GFX9: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]] ; GFX9: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; GFX9: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]] ; GFX9: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) ; GFX9: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]] - ; GFX9: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) - ; GFX9: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] - ; GFX9: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16) + ; GFX9: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16) + ; GFX9: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]] + ; GFX9: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; GFX9: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; GFX9: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32) + ; GFX9: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] ; GFX9: [[DEF:%[0-9]+]]:_(s96) = G_IMPLICIT_DEF ; GFX9: [[INSERT:%[0-9]+]]:_(s96) = G_INSERT [[DEF]], [[MV]](s64), 0 - ; GFX9: [[INSERT1:%[0-9]+]]:_(s96) = G_INSERT [[INSERT]], [[MV1]](s32), 64 - ; GFX9: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX9: [[GEP11:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C10]](s32) + ; GFX9: [[INSERT1:%[0-9]+]]:_(s96) = G_INSERT [[INSERT]], [[OR8]](s32), 64 + ; GFX9: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; GFX9: [[GEP11:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C11]](s32) ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[GEP11]](p3) :: (load 1, addrspace 3) ; GFX9: [[GEP12:%[0-9]+]]:_(p3) = G_GEP [[GEP11]], [[C]](s32) ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[GEP12]](p3) :: (load 1, addrspace 3) @@ -10708,28 +11987,36 @@ body: | ; GFX9: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]] ; GFX9: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32) ; GFX9: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]] - ; GFX9: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16) - ; GFX9: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL6]] + ; GFX9: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16) + ; GFX9: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL9]] ; GFX9: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; GFX9: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]] ; GFX9: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32) ; GFX9: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]] - ; GFX9: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16) - ; GFX9: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL7]] + ; GFX9: [[SHL10:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16) + ; GFX9: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL10]] ; GFX9: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD16]](s32) ; GFX9: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C7]] ; GFX9: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD17]](s32) ; GFX9: [[AND17:%[0-9]+]]:_(s16) = G_AND [[TRUNC17]], [[C7]] - ; GFX9: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND17]], [[C8]](s16) - ; GFX9: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[SHL8]] + ; GFX9: [[SHL11:%[0-9]+]]:_(s16) = G_SHL [[AND17]], [[C8]](s16) + ; GFX9: [[OR11:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[SHL11]] ; GFX9: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32) ; GFX9: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C7]] ; GFX9: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD19]](s32) ; GFX9: [[AND19:%[0-9]+]]:_(s16) = G_AND [[TRUNC19]], [[C7]] - ; GFX9: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND19]], [[C8]](s16) - ; GFX9: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[SHL9]] - ; GFX9: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR6]](s16), [[OR7]](s16), [[OR8]](s16), [[OR9]](s16) - ; GFX9: [[GEP19:%[0-9]+]]:_(p3) = G_GEP [[GEP11]], [[C9]](s32) + ; GFX9: [[SHL12:%[0-9]+]]:_(s16) = G_SHL [[AND19]], [[C8]](s16) + ; GFX9: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[SHL12]] + ; GFX9: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; GFX9: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR10]](s16) + ; GFX9: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32) + ; GFX9: [[OR13:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL13]] + ; GFX9: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR11]](s16) + ; GFX9: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16) + ; GFX9: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C9]](s32) + ; GFX9: [[OR14:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL14]] + ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR13]](s32), [[OR14]](s32) + ; GFX9: [[GEP19:%[0-9]+]]:_(p3) = G_GEP [[GEP11]], [[C10]](s32) ; GFX9: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[GEP19]](p3) :: (load 1, addrspace 3) ; GFX9: [[GEP20:%[0-9]+]]:_(p3) = G_GEP [[GEP19]], [[C]](s32) ; GFX9: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[GEP20]](p3) :: (load 1, addrspace 3) @@ -10741,18 +12028,21 @@ body: | ; GFX9: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C7]] ; GFX9: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD21]](s32) ; GFX9: [[AND21:%[0-9]+]]:_(s16) = G_AND [[TRUNC21]], [[C7]] - ; GFX9: [[SHL10:%[0-9]+]]:_(s16) = G_SHL [[AND21]], [[C8]](s16) - ; GFX9: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[SHL10]] + ; GFX9: [[SHL15:%[0-9]+]]:_(s16) = G_SHL [[AND21]], [[C8]](s16) + ; GFX9: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[SHL15]] ; GFX9: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32) ; GFX9: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C7]] ; GFX9: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD23]](s32) ; GFX9: [[AND23:%[0-9]+]]:_(s16) = G_AND [[TRUNC23]], [[C7]] - ; GFX9: [[SHL11:%[0-9]+]]:_(s16) = G_SHL [[AND23]], [[C8]](s16) - ; GFX9: [[OR11:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[SHL11]] - ; GFX9: [[MV3:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR10]](s16), [[OR11]](s16) + ; GFX9: [[SHL16:%[0-9]+]]:_(s16) = G_SHL [[AND23]], [[C8]](s16) + ; GFX9: [[OR16:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[SHL16]] + ; GFX9: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR15]](s16) + ; GFX9: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR16]](s16) + ; GFX9: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C9]](s32) + ; GFX9: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]] ; GFX9: [[DEF1:%[0-9]+]]:_(s96) = G_IMPLICIT_DEF - ; GFX9: [[INSERT2:%[0-9]+]]:_(s96) = G_INSERT [[DEF1]], [[MV2]](s64), 0 - ; GFX9: [[INSERT3:%[0-9]+]]:_(s96) = G_INSERT [[INSERT2]], [[MV3]](s32), 64 + ; GFX9: [[INSERT2:%[0-9]+]]:_(s96) = G_INSERT [[DEF1]], [[MV1]](s64), 0 + ; GFX9: [[INSERT3:%[0-9]+]]:_(s96) = G_INSERT [[INSERT2]], [[OR17]](s32), 64 ; GFX9: [[COPY1:%[0-9]+]]:_(s96) = COPY [[INSERT1]](s96) ; GFX9: [[COPY2:%[0-9]+]]:_(s96) = COPY [[INSERT3]](s96) ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) @@ -10774,276 +12064,398 @@ body: | ; SI-LABEL: name: test_extload_local_v2s96_from_24_align2 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[GEP:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C]](s32) ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p3) :: (load 2, addrspace 3) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[GEP1:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C1]](s32) ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p3) :: (load 2, addrspace 3) - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 ; SI: [[GEP2:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C2]](s32) ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p3) :: (load 2, addrspace 3) - ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) - ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI: [[GEP3:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C3]](s32) + ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) + ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; SI: [[GEP3:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C5]](s32) ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p3) :: (load 2, addrspace 3) - ; SI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; SI: [[GEP4:%[0-9]+]]:_(p3) = G_GEP [[GEP3]], [[C]](s32) ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p3) :: (load 2, addrspace 3) - ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) - ; SI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC4]](s16), [[TRUNC5]](s16) + ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] + ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] + ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) + ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; SI: [[DEF:%[0-9]+]]:_(s96) = G_IMPLICIT_DEF ; SI: [[INSERT:%[0-9]+]]:_(s96) = G_INSERT [[DEF]], [[MV]](s64), 0 - ; SI: [[INSERT1:%[0-9]+]]:_(s96) = G_INSERT [[INSERT]], [[MV1]](s32), 64 - ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; SI: [[GEP5:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C4]](s32) + ; SI: [[INSERT1:%[0-9]+]]:_(s96) = G_INSERT [[INSERT]], [[OR2]](s32), 64 + ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; SI: [[GEP5:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C6]](s32) ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[GEP5]](p3) :: (load 2, addrspace 3) - ; SI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; SI: [[GEP6:%[0-9]+]]:_(p3) = G_GEP [[GEP5]], [[C]](s32) ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[GEP6]](p3) :: (load 2, addrspace 3) - ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) ; SI: [[GEP7:%[0-9]+]]:_(p3) = G_GEP [[GEP5]], [[C1]](s32) ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p3) :: (load 2, addrspace 3) - ; SI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32) ; SI: [[GEP8:%[0-9]+]]:_(p3) = G_GEP [[GEP5]], [[C2]](s32) ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p3) :: (load 2, addrspace 3) - ; SI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) - ; SI: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[TRUNC6]](s16), [[TRUNC7]](s16), [[TRUNC8]](s16), [[TRUNC9]](s16) - ; SI: [[GEP9:%[0-9]+]]:_(p3) = G_GEP [[GEP5]], [[C3]](s32) + ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] + ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) + ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]] + ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C4]](s32) + ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) + ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] + ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) + ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]] + ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32) + ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]] + ; SI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR3]](s32), [[OR4]](s32) + ; SI: [[GEP9:%[0-9]+]]:_(p3) = G_GEP [[GEP5]], [[C5]](s32) ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[GEP9]](p3) :: (load 2, addrspace 3) - ; SI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; SI: [[GEP10:%[0-9]+]]:_(p3) = G_GEP [[GEP9]], [[C]](s32) ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[GEP10]](p3) :: (load 2, addrspace 3) - ; SI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) - ; SI: [[MV3:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC10]](s16), [[TRUNC11]](s16) + ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32) + ; SI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C3]] + ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) + ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]] + ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C4]](s32) + ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]] ; SI: [[DEF1:%[0-9]+]]:_(s96) = G_IMPLICIT_DEF - ; SI: [[INSERT2:%[0-9]+]]:_(s96) = G_INSERT [[DEF1]], [[MV2]](s64), 0 - ; SI: [[INSERT3:%[0-9]+]]:_(s96) = G_INSERT [[INSERT2]], [[MV3]](s32), 64 - ; SI: [[COPY1:%[0-9]+]]:_(s96) = COPY [[INSERT1]](s96) - ; SI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[INSERT3]](s96) - ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; SI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; SI: [[INSERT2:%[0-9]+]]:_(s96) = G_INSERT [[DEF1]], [[MV1]](s64), 0 + ; SI: [[INSERT3:%[0-9]+]]:_(s96) = G_INSERT [[INSERT2]], [[OR5]](s32), 64 + ; SI: [[COPY13:%[0-9]+]]:_(s96) = COPY [[INSERT1]](s96) + ; SI: [[COPY14:%[0-9]+]]:_(s96) = COPY [[INSERT3]](s96) + ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY13]](s96) + ; SI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY14]](s96) ; CI-LABEL: name: test_extload_local_v2s96_from_24_align2 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) - ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[GEP:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C]](s32) ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p3) :: (load 2, addrspace 3) - ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[GEP1:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C1]](s32) ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p3) :: (load 2, addrspace 3) - ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 ; CI: [[GEP2:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C2]](s32) ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p3) :: (load 2, addrspace 3) - ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) - ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI: [[GEP3:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C3]](s32) + ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) + ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI: [[GEP3:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C5]](s32) ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p3) :: (load 2, addrspace 3) - ; CI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; CI: [[GEP4:%[0-9]+]]:_(p3) = G_GEP [[GEP3]], [[C]](s32) ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p3) :: (load 2, addrspace 3) - ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) - ; CI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC4]](s16), [[TRUNC5]](s16) + ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] + ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] + ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) + ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; CI: [[DEF:%[0-9]+]]:_(s96) = G_IMPLICIT_DEF ; CI: [[INSERT:%[0-9]+]]:_(s96) = G_INSERT [[DEF]], [[MV]](s64), 0 - ; CI: [[INSERT1:%[0-9]+]]:_(s96) = G_INSERT [[INSERT]], [[MV1]](s32), 64 - ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CI: [[GEP5:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C4]](s32) + ; CI: [[INSERT1:%[0-9]+]]:_(s96) = G_INSERT [[INSERT]], [[OR2]](s32), 64 + ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CI: [[GEP5:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C6]](s32) ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[GEP5]](p3) :: (load 2, addrspace 3) - ; CI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; CI: [[GEP6:%[0-9]+]]:_(p3) = G_GEP [[GEP5]], [[C]](s32) ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[GEP6]](p3) :: (load 2, addrspace 3) - ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) ; CI: [[GEP7:%[0-9]+]]:_(p3) = G_GEP [[GEP5]], [[C1]](s32) ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p3) :: (load 2, addrspace 3) - ; CI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32) ; CI: [[GEP8:%[0-9]+]]:_(p3) = G_GEP [[GEP5]], [[C2]](s32) ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p3) :: (load 2, addrspace 3) - ; CI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) - ; CI: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[TRUNC6]](s16), [[TRUNC7]](s16), [[TRUNC8]](s16), [[TRUNC9]](s16) - ; CI: [[GEP9:%[0-9]+]]:_(p3) = G_GEP [[GEP5]], [[C3]](s32) + ; CI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] + ; CI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) + ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]] + ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C4]](s32) + ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; CI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) + ; CI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] + ; CI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) + ; CI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]] + ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32) + ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]] + ; CI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR3]](s32), [[OR4]](s32) + ; CI: [[GEP9:%[0-9]+]]:_(p3) = G_GEP [[GEP5]], [[C5]](s32) ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[GEP9]](p3) :: (load 2, addrspace 3) - ; CI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; CI: [[GEP10:%[0-9]+]]:_(p3) = G_GEP [[GEP9]], [[C]](s32) ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[GEP10]](p3) :: (load 2, addrspace 3) - ; CI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) - ; CI: [[MV3:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC10]](s16), [[TRUNC11]](s16) + ; CI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32) + ; CI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C3]] + ; CI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) + ; CI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]] + ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C4]](s32) + ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]] ; CI: [[DEF1:%[0-9]+]]:_(s96) = G_IMPLICIT_DEF - ; CI: [[INSERT2:%[0-9]+]]:_(s96) = G_INSERT [[DEF1]], [[MV2]](s64), 0 - ; CI: [[INSERT3:%[0-9]+]]:_(s96) = G_INSERT [[INSERT2]], [[MV3]](s32), 64 - ; CI: [[COPY1:%[0-9]+]]:_(s96) = COPY [[INSERT1]](s96) - ; CI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[INSERT3]](s96) - ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; CI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; CI: [[INSERT2:%[0-9]+]]:_(s96) = G_INSERT [[DEF1]], [[MV1]](s64), 0 + ; CI: [[INSERT3:%[0-9]+]]:_(s96) = G_INSERT [[INSERT2]], [[OR5]](s32), 64 + ; CI: [[COPY13:%[0-9]+]]:_(s96) = COPY [[INSERT1]](s96) + ; CI: [[COPY14:%[0-9]+]]:_(s96) = COPY [[INSERT3]](s96) + ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY13]](s96) + ; CI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY14]](s96) ; CI-DS128-LABEL: name: test_extload_local_v2s96_from_24_align2 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) - ; CI-DS128: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI-DS128: [[GEP:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C]](s32) ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p3) :: (load 2, addrspace 3) - ; CI-DS128: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI-DS128: [[GEP1:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C1]](s32) ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p3) :: (load 2, addrspace 3) - ; CI-DS128: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 ; CI-DS128: [[GEP2:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C2]](s32) ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p3) :: (load 2, addrspace 3) - ; CI-DS128: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) ; CI-DS128: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI-DS128: [[GEP3:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C3]](s32) ; CI-DS128: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p3) :: (load 2, addrspace 3) - ; CI-DS128: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; CI-DS128: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 ; CI-DS128: [[GEP4:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C4]](s32) ; CI-DS128: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p3) :: (load 2, addrspace 3) - ; CI-DS128: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) - ; CI-DS128: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16), [[TRUNC4]](s16), [[TRUNC5]](s16) - ; CI-DS128: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CI-DS128: [[GEP5:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C5]](s32) + ; CI-DS128: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-DS128: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-DS128: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C5]] + ; CI-DS128: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-DS128: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]] + ; CI-DS128: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-DS128: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; CI-DS128: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI-DS128: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI-DS128: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C5]] + ; CI-DS128: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI-DS128: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C5]] + ; CI-DS128: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; CI-DS128: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI-DS128: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; CI-DS128: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] + ; CI-DS128: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; CI-DS128: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]] + ; CI-DS128: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; CI-DS128: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CI-DS128: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32), [[OR2]](s32) + ; CI-DS128: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CI-DS128: [[GEP5:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C7]](s32) ; CI-DS128: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[GEP5]](p3) :: (load 2, addrspace 3) - ; CI-DS128: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; CI-DS128: [[GEP6:%[0-9]+]]:_(p3) = G_GEP [[GEP5]], [[C]](s32) ; CI-DS128: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[GEP6]](p3) :: (load 2, addrspace 3) - ; CI-DS128: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) ; CI-DS128: [[GEP7:%[0-9]+]]:_(p3) = G_GEP [[GEP5]], [[C1]](s32) ; CI-DS128: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p3) :: (load 2, addrspace 3) - ; CI-DS128: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32) ; CI-DS128: [[GEP8:%[0-9]+]]:_(p3) = G_GEP [[GEP5]], [[C2]](s32) ; CI-DS128: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p3) :: (load 2, addrspace 3) - ; CI-DS128: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) ; CI-DS128: [[GEP9:%[0-9]+]]:_(p3) = G_GEP [[GEP5]], [[C3]](s32) ; CI-DS128: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[GEP9]](p3) :: (load 2, addrspace 3) - ; CI-DS128: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; CI-DS128: [[GEP10:%[0-9]+]]:_(p3) = G_GEP [[GEP5]], [[C4]](s32) ; CI-DS128: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[GEP10]](p3) :: (load 2, addrspace 3) - ; CI-DS128: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) - ; CI-DS128: [[MV1:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[TRUNC6]](s16), [[TRUNC7]](s16), [[TRUNC8]](s16), [[TRUNC9]](s16), [[TRUNC10]](s16), [[TRUNC11]](s16) - ; CI-DS128: [[COPY1:%[0-9]+]]:_(s96) = COPY [[MV]](s96) - ; CI-DS128: [[COPY2:%[0-9]+]]:_(s96) = COPY [[MV1]](s96) - ; CI-DS128: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; CI-DS128: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; CI-DS128: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; CI-DS128: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C5]] + ; CI-DS128: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) + ; CI-DS128: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C5]] + ; CI-DS128: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) + ; CI-DS128: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; CI-DS128: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) + ; CI-DS128: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C5]] + ; CI-DS128: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) + ; CI-DS128: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C5]] + ; CI-DS128: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C6]](s32) + ; CI-DS128: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]] + ; CI-DS128: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32) + ; CI-DS128: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C5]] + ; CI-DS128: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) + ; CI-DS128: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C5]] + ; CI-DS128: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32) + ; CI-DS128: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]] + ; CI-DS128: [[MV1:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR3]](s32), [[OR4]](s32), [[OR5]](s32) + ; CI-DS128: [[COPY13:%[0-9]+]]:_(s96) = COPY [[MV]](s96) + ; CI-DS128: [[COPY14:%[0-9]+]]:_(s96) = COPY [[MV1]](s96) + ; CI-DS128: $vgpr0_vgpr1_vgpr2 = COPY [[COPY13]](s96) + ; CI-DS128: $vgpr3_vgpr4_vgpr5 = COPY [[COPY14]](s96) ; VI-LABEL: name: test_extload_local_v2s96_from_24_align2 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[GEP:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C]](s32) ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p3) :: (load 2, addrspace 3) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[GEP1:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C1]](s32) ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p3) :: (load 2, addrspace 3) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 ; VI: [[GEP2:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C2]](s32) ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p3) :: (load 2, addrspace 3) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) - ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI: [[GEP3:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C3]](s32) + ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) + ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; VI: [[GEP3:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C5]](s32) ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p3) :: (load 2, addrspace 3) - ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; VI: [[GEP4:%[0-9]+]]:_(p3) = G_GEP [[GEP3]], [[C]](s32) ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p3) :: (load 2, addrspace 3) - ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) - ; VI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC4]](s16), [[TRUNC5]](s16) + ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] + ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; VI: [[DEF:%[0-9]+]]:_(s96) = G_IMPLICIT_DEF ; VI: [[INSERT:%[0-9]+]]:_(s96) = G_INSERT [[DEF]], [[MV]](s64), 0 - ; VI: [[INSERT1:%[0-9]+]]:_(s96) = G_INSERT [[INSERT]], [[MV1]](s32), 64 - ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; VI: [[GEP5:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C4]](s32) + ; VI: [[INSERT1:%[0-9]+]]:_(s96) = G_INSERT [[INSERT]], [[OR2]](s32), 64 + ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; VI: [[GEP5:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C6]](s32) ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[GEP5]](p3) :: (load 2, addrspace 3) - ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; VI: [[GEP6:%[0-9]+]]:_(p3) = G_GEP [[GEP5]], [[C]](s32) ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[GEP6]](p3) :: (load 2, addrspace 3) - ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) ; VI: [[GEP7:%[0-9]+]]:_(p3) = G_GEP [[GEP5]], [[C1]](s32) ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p3) :: (load 2, addrspace 3) - ; VI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32) ; VI: [[GEP8:%[0-9]+]]:_(p3) = G_GEP [[GEP5]], [[C2]](s32) ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p3) :: (load 2, addrspace 3) - ; VI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) - ; VI: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[TRUNC6]](s16), [[TRUNC7]](s16), [[TRUNC8]](s16), [[TRUNC9]](s16) - ; VI: [[GEP9:%[0-9]+]]:_(p3) = G_GEP [[GEP5]], [[C3]](s32) + ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] + ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) + ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]] + ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C4]](s32) + ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) + ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] + ; VI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) + ; VI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]] + ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32) + ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]] + ; VI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR3]](s32), [[OR4]](s32) + ; VI: [[GEP9:%[0-9]+]]:_(p3) = G_GEP [[GEP5]], [[C5]](s32) ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[GEP9]](p3) :: (load 2, addrspace 3) - ; VI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; VI: [[GEP10:%[0-9]+]]:_(p3) = G_GEP [[GEP9]], [[C]](s32) ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[GEP10]](p3) :: (load 2, addrspace 3) - ; VI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) - ; VI: [[MV3:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC10]](s16), [[TRUNC11]](s16) + ; VI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32) + ; VI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C3]] + ; VI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) + ; VI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]] + ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C4]](s32) + ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]] ; VI: [[DEF1:%[0-9]+]]:_(s96) = G_IMPLICIT_DEF - ; VI: [[INSERT2:%[0-9]+]]:_(s96) = G_INSERT [[DEF1]], [[MV2]](s64), 0 - ; VI: [[INSERT3:%[0-9]+]]:_(s96) = G_INSERT [[INSERT2]], [[MV3]](s32), 64 - ; VI: [[COPY1:%[0-9]+]]:_(s96) = COPY [[INSERT1]](s96) - ; VI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[INSERT3]](s96) - ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; VI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; VI: [[INSERT2:%[0-9]+]]:_(s96) = G_INSERT [[DEF1]], [[MV1]](s64), 0 + ; VI: [[INSERT3:%[0-9]+]]:_(s96) = G_INSERT [[INSERT2]], [[OR5]](s32), 64 + ; VI: [[COPY13:%[0-9]+]]:_(s96) = COPY [[INSERT1]](s96) + ; VI: [[COPY14:%[0-9]+]]:_(s96) = COPY [[INSERT3]](s96) + ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY13]](s96) + ; VI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY14]](s96) ; GFX9-LABEL: name: test_extload_local_v2s96_from_24_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[GEP:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C]](s32) ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p3) :: (load 2, addrspace 3) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[GEP1:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C1]](s32) ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p3) :: (load 2, addrspace 3) - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 ; GFX9: [[GEP2:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C2]](s32) ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p3) :: (load 2, addrspace 3) - ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9: [[GEP3:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C3]](s32) + ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) + ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9: [[GEP3:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C5]](s32) ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p3) :: (load 2, addrspace 3) - ; GFX9: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; GFX9: [[GEP4:%[0-9]+]]:_(p3) = G_GEP [[GEP3]], [[C]](s32) ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p3) :: (load 2, addrspace 3) - ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) - ; GFX9: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC4]](s16), [[TRUNC5]](s16) + ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] + ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] + ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) + ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; GFX9: [[DEF:%[0-9]+]]:_(s96) = G_IMPLICIT_DEF ; GFX9: [[INSERT:%[0-9]+]]:_(s96) = G_INSERT [[DEF]], [[MV]](s64), 0 - ; GFX9: [[INSERT1:%[0-9]+]]:_(s96) = G_INSERT [[INSERT]], [[MV1]](s32), 64 - ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX9: [[GEP5:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C4]](s32) + ; GFX9: [[INSERT1:%[0-9]+]]:_(s96) = G_INSERT [[INSERT]], [[OR2]](s32), 64 + ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; GFX9: [[GEP5:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C6]](s32) ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[GEP5]](p3) :: (load 2, addrspace 3) - ; GFX9: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; GFX9: [[GEP6:%[0-9]+]]:_(p3) = G_GEP [[GEP5]], [[C]](s32) ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[GEP6]](p3) :: (load 2, addrspace 3) - ; GFX9: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) ; GFX9: [[GEP7:%[0-9]+]]:_(p3) = G_GEP [[GEP5]], [[C1]](s32) ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p3) :: (load 2, addrspace 3) - ; GFX9: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32) ; GFX9: [[GEP8:%[0-9]+]]:_(p3) = G_GEP [[GEP5]], [[C2]](s32) ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p3) :: (load 2, addrspace 3) - ; GFX9: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) - ; GFX9: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[TRUNC6]](s16), [[TRUNC7]](s16), [[TRUNC8]](s16), [[TRUNC9]](s16) - ; GFX9: [[GEP9:%[0-9]+]]:_(p3) = G_GEP [[GEP5]], [[C3]](s32) + ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; GFX9: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] + ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) + ; GFX9: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]] + ; GFX9: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C4]](s32) + ; GFX9: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) + ; GFX9: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] + ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) + ; GFX9: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]] + ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C4]](s32) + ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]] + ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR3]](s32), [[OR4]](s32) + ; GFX9: [[GEP9:%[0-9]+]]:_(p3) = G_GEP [[GEP5]], [[C5]](s32) ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[GEP9]](p3) :: (load 2, addrspace 3) - ; GFX9: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; GFX9: [[GEP10:%[0-9]+]]:_(p3) = G_GEP [[GEP9]], [[C]](s32) ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[GEP10]](p3) :: (load 2, addrspace 3) - ; GFX9: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) - ; GFX9: [[MV3:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC10]](s16), [[TRUNC11]](s16) + ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32) + ; GFX9: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C3]] + ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) + ; GFX9: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]] + ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C4]](s32) + ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]] ; GFX9: [[DEF1:%[0-9]+]]:_(s96) = G_IMPLICIT_DEF - ; GFX9: [[INSERT2:%[0-9]+]]:_(s96) = G_INSERT [[DEF1]], [[MV2]](s64), 0 - ; GFX9: [[INSERT3:%[0-9]+]]:_(s96) = G_INSERT [[INSERT2]], [[MV3]](s32), 64 - ; GFX9: [[COPY1:%[0-9]+]]:_(s96) = COPY [[INSERT1]](s96) - ; GFX9: [[COPY2:%[0-9]+]]:_(s96) = COPY [[INSERT3]](s96) - ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX9: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX9: [[INSERT2:%[0-9]+]]:_(s96) = G_INSERT [[DEF1]], [[MV1]](s64), 0 + ; GFX9: [[INSERT3:%[0-9]+]]:_(s96) = G_INSERT [[INSERT2]], [[OR5]](s32), 64 + ; GFX9: [[COPY13:%[0-9]+]]:_(s96) = COPY [[INSERT1]](s96) + ; GFX9: [[COPY14:%[0-9]+]]:_(s96) = COPY [[INSERT3]](s96) + ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[COPY13]](s96) + ; GFX9: $vgpr3_vgpr4_vgpr5 = COPY [[COPY14]](s96) %0:_(p3) = COPY $vgpr0 %1:_(<2 x s96>) = G_LOAD %0 :: (load 24, align 2, addrspace 3) %2:_(s96) = G_EXTRACT %1, 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir index d996cfac4bedf..ed819e4658b26 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir @@ -328,43 +328,63 @@ body: | ; SI-LABEL: name: test_load_private_s32_align2 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[GEP:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C]](s32) ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p5) :: (load 2, addrspace 5) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; SI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; SI: $vgpr0 = COPY [[MV]](s32) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; SI: $vgpr0 = COPY [[OR]](s32) ; CI-LABEL: name: test_load_private_s32_align2 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) - ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[GEP:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C]](s32) ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p5) :: (load 2, addrspace 5) - ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; CI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; CI: $vgpr0 = COPY [[MV]](s32) + ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI: $vgpr0 = COPY [[OR]](s32) ; VI-LABEL: name: test_load_private_s32_align2 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[GEP:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C]](s32) ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p5) :: (load 2, addrspace 5) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; VI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; VI: $vgpr0 = COPY [[MV]](s32) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; VI: $vgpr0 = COPY [[OR]](s32) ; GFX9-LABEL: name: test_load_private_s32_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[GEP:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C]](s32) ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p5) :: (load 2, addrspace 5) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9: $vgpr0 = COPY [[MV]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX9: $vgpr0 = COPY [[OR]](s32) %0:_(p5) = COPY $vgpr0 %1:_(s32) = G_LOAD %0 :: (load 4, align 2, addrspace 5) $vgpr0 = COPY %1 @@ -406,8 +426,12 @@ body: | ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; SI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; SI: $vgpr0 = COPY [[MV]](s32) + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32) + ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; SI: $vgpr0 = COPY [[OR2]](s32) ; CI-LABEL: name: test_load_private_s32_align1 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) @@ -438,8 +462,12 @@ body: | ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; CI: $vgpr0 = COPY [[MV]](s32) + ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32) + ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; CI: $vgpr0 = COPY [[OR2]](s32) ; VI-LABEL: name: test_load_private_s32_align1 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) @@ -466,8 +494,12 @@ body: | ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; VI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; VI: $vgpr0 = COPY [[MV]](s32) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; VI: $vgpr0 = COPY [[OR2]](s32) ; GFX9-LABEL: name: test_load_private_s32_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) @@ -494,8 +526,12 @@ body: | ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX9: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; GFX9: $vgpr0 = COPY [[MV]](s32) + ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; GFX9: $vgpr0 = COPY [[OR2]](s32) %0:_(p5) = COPY $vgpr0 %1:_(s32) = G_LOAD %0 :: (load 4, align 1, addrspace 5) $vgpr0 = COPY %1 @@ -664,79 +700,111 @@ body: | ; SI-LABEL: name: test_load_private_s64_align2 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[GEP:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C]](s32) ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p5) :: (load 2, addrspace 5) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; SI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI: [[GEP1:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C1]](s32) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; SI: [[GEP1:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C3]](s32) ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p5) :: (load 2, addrspace 5) - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; SI: [[GEP2:%[0-9]+]]:_(p5) = G_GEP [[GEP1]], [[C]](s32) ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p5) :: (load 2, addrspace 5) - ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; SI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC2]](s16), [[TRUNC3]](s16) - ; SI: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32) - ; SI: $vgpr0_vgpr1 = COPY [[MV2]](s64) + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) + ; SI: $vgpr0_vgpr1 = COPY [[MV]](s64) ; CI-LABEL: name: test_load_private_s64_align2 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) - ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[GEP:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C]](s32) ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p5) :: (load 2, addrspace 5) - ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; CI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI: [[GEP1:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C1]](s32) + ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CI: [[GEP1:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C3]](s32) ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p5) :: (load 2, addrspace 5) - ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI: [[GEP2:%[0-9]+]]:_(p5) = G_GEP [[GEP1]], [[C]](s32) ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p5) :: (load 2, addrspace 5) - ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; CI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC2]](s16), [[TRUNC3]](s16) - ; CI: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32) - ; CI: $vgpr0_vgpr1 = COPY [[MV2]](s64) + ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) + ; CI: $vgpr0_vgpr1 = COPY [[MV]](s64) ; VI-LABEL: name: test_load_private_s64_align2 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[GEP:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C]](s32) ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p5) :: (load 2, addrspace 5) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; VI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI: [[GEP1:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C1]](s32) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; VI: [[GEP1:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C3]](s32) ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p5) :: (load 2, addrspace 5) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; VI: [[GEP2:%[0-9]+]]:_(p5) = G_GEP [[GEP1]], [[C]](s32) ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p5) :: (load 2, addrspace 5) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; VI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC2]](s16), [[TRUNC3]](s16) - ; VI: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32) - ; VI: $vgpr0_vgpr1 = COPY [[MV2]](s64) + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) + ; VI: $vgpr0_vgpr1 = COPY [[MV]](s64) ; GFX9-LABEL: name: test_load_private_s64_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[GEP:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C]](s32) ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p5) :: (load 2, addrspace 5) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9: [[GEP1:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C1]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX9: [[GEP1:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C3]](s32) ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p5) :: (load 2, addrspace 5) - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; GFX9: [[GEP2:%[0-9]+]]:_(p5) = G_GEP [[GEP1]], [[C]](s32) ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p5) :: (load 2, addrspace 5) - ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX9: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32) - ; GFX9: $vgpr0_vgpr1 = COPY [[MV2]](s64) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) + ; GFX9: $vgpr0_vgpr1 = COPY [[MV]](s64) %0:_(p5) = COPY $vgpr0 %1:_(s64) = G_LOAD %0 :: (load 8, align 2, addrspace 5) $vgpr0_vgpr1 = COPY %1 @@ -778,9 +846,13 @@ body: | ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; SI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C6]](s32) + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32) + ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; SI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C7]](s32) ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p5) :: (load 1, addrspace 5) ; SI: [[GEP4:%[0-9]+]]:_(p5) = G_GEP [[GEP3]], [[C]](s32) ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p5) :: (load 1, addrspace 5) @@ -793,20 +865,23 @@ body: | ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] - ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) - ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; SI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] + ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) + ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) + ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; SI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; SI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C5]] - ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY6]](s32) - ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; SI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) - ; SI: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32) - ; SI: $vgpr0_vgpr1 = COPY [[MV2]](s64) + ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY6]](s32) + ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) + ; SI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] + ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C6]](s32) + ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32) + ; SI: $vgpr0_vgpr1 = COPY [[MV]](s64) ; CI-LABEL: name: test_load_private_s64_align1 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) @@ -837,9 +912,13 @@ body: | ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C6]](s32) + ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32) + ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C7]](s32) ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p5) :: (load 1, addrspace 5) ; CI: [[GEP4:%[0-9]+]]:_(p5) = G_GEP [[GEP3]], [[C]](s32) ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p5) :: (load 1, addrspace 5) @@ -852,20 +931,23 @@ body: | ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] - ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) - ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; CI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] + ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) + ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) + ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; CI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; CI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C5]] - ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY6]](s32) - ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) - ; CI: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32) - ; CI: $vgpr0_vgpr1 = COPY [[MV2]](s64) + ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY6]](s32) + ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) + ; CI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] + ; CI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C6]](s32) + ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32) + ; CI: $vgpr0_vgpr1 = COPY [[MV]](s64) ; VI-LABEL: name: test_load_private_s64_align1 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) @@ -892,9 +974,13 @@ body: | ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; VI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C5]](s32) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; VI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C6]](s32) ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p5) :: (load 1, addrspace 5) ; VI: [[GEP4:%[0-9]+]]:_(p5) = G_GEP [[GEP3]], [[C]](s32) ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p5) :: (load 1, addrspace 5) @@ -906,17 +992,20 @@ body: | ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C3]] ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C3]] - ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) - ; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] + ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) + ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]] ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; VI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C3]] - ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) - ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; VI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) - ; VI: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32) - ; VI: $vgpr0_vgpr1 = COPY [[MV2]](s64) + ; VI: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) + ; VI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL4]] + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C5]](s32) + ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32) + ; VI: $vgpr0_vgpr1 = COPY [[MV]](s64) ; GFX9-LABEL: name: test_load_private_s64_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) @@ -943,9 +1032,13 @@ body: | ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX9: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C5]](s32) + ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX9: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C6]](s32) ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p5) :: (load 1, addrspace 5) ; GFX9: [[GEP4:%[0-9]+]]:_(p5) = G_GEP [[GEP3]], [[C]](s32) ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p5) :: (load 1, addrspace 5) @@ -957,17 +1050,20 @@ body: | ; GFX9: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C3]] ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) ; GFX9: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C3]] - ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) - ; GFX9: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] + ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) + ; GFX9: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]] ; GFX9: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; GFX9: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; GFX9: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) ; GFX9: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C3]] - ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) - ; GFX9: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; GFX9: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) - ; GFX9: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32) - ; GFX9: $vgpr0_vgpr1 = COPY [[MV2]](s64) + ; GFX9: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) + ; GFX9: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL4]] + ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C5]](s32) + ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32) + ; GFX9: $vgpr0_vgpr1 = COPY [[MV]](s64) %0:_(p5) = COPY $vgpr0 %1:_(s64) = G_LOAD %0 :: (load 8, align 1, addrspace 5) $vgpr0_vgpr1 = COPY %1 @@ -1010,9 +1106,13 @@ body: | ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY3]](s32) ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; SI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C6]](s32) + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32) + ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; SI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C7]](s32) ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p5) :: (load 1, addrspace 56) ; SI: [[GEP4:%[0-9]+]]:_(p5) = G_GEP [[GEP3]], [[C]](s32) ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p5) :: (load 1, addrspace 56) @@ -1025,18 +1125,21 @@ body: | ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]] - ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY5]](s32) - ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; SI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] + ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY5]](s32) + ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) + ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; SI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; SI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C5]] - ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY7]](s32) - ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; SI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) + ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY7]](s32) + ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) + ; SI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] + ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C6]](s32) + ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] ; SI: [[GEP7:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C4]](s32) ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p5) :: (load 1, addrspace 56) ; SI: [[GEP8:%[0-9]+]]:_(p5) = G_GEP [[GEP7]], [[C]](s32) @@ -1050,20 +1153,23 @@ body: | ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C5]] - ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY9]](s32) - ; SI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; SI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] + ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY9]](s32) + ; SI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) + ; SI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] ; SI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; SI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C3]] ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C5]] - ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY11]](s32) - ; SI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) - ; SI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] - ; SI: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16) - ; SI: [[MV3:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32), [[MV2]](s32) - ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[MV3]](s96) + ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY11]](s32) + ; SI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) + ; SI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] + ; SI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; SI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; SI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C6]](s32) + ; SI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] + ; SI: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) + ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) ; CI-LABEL: name: test_load_private_s96_align16 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 56) @@ -1095,9 +1201,13 @@ body: | ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY3]](s32) ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C6]](s32) + ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32) + ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C7]](s32) ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p5) :: (load 1, addrspace 56) ; CI: [[GEP4:%[0-9]+]]:_(p5) = G_GEP [[GEP3]], [[C]](s32) ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p5) :: (load 1, addrspace 56) @@ -1110,18 +1220,21 @@ body: | ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]] - ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY5]](s32) - ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; CI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] + ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY5]](s32) + ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) + ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; CI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; CI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; CI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C5]] - ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY7]](s32) - ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) + ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY7]](s32) + ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) + ; CI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] + ; CI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C6]](s32) + ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] ; CI: [[GEP7:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C4]](s32) ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p5) :: (load 1, addrspace 56) ; CI: [[GEP8:%[0-9]+]]:_(p5) = G_GEP [[GEP7]], [[C]](s32) @@ -1135,20 +1248,23 @@ body: | ; CI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) ; CI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C5]] - ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY9]](s32) - ; CI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; CI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] + ; CI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY9]](s32) + ; CI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) + ; CI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] ; CI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; CI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C3]] ; CI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) ; CI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C5]] - ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY11]](s32) - ; CI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) - ; CI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] - ; CI: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16) - ; CI: [[MV3:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32), [[MV2]](s32) - ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[MV3]](s96) + ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY11]](s32) + ; CI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) + ; CI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] + ; CI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; CI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; CI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C6]](s32) + ; CI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] + ; CI: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) + ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) ; VI-LABEL: name: test_load_private_s96_align16 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 56) @@ -1175,9 +1291,13 @@ body: | ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; VI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C5]](s32) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; VI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C6]](s32) ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p5) :: (load 1, addrspace 56) ; VI: [[GEP4:%[0-9]+]]:_(p5) = G_GEP [[GEP3]], [[C]](s32) ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p5) :: (load 1, addrspace 56) @@ -1189,17 +1309,20 @@ body: | ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C3]] ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C3]] - ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) - ; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] + ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) + ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]] ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; VI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C3]] - ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) - ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; VI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) - ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI: [[GEP7:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C6]](s32) + ; VI: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) + ; VI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL4]] + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C5]](s32) + ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; VI: [[GEP7:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C7]](s32) ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p5) :: (load 1, addrspace 56) ; VI: [[GEP8:%[0-9]+]]:_(p5) = G_GEP [[GEP7]], [[C]](s32) ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p5) :: (load 1, addrspace 56) @@ -1211,17 +1334,20 @@ body: | ; VI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C3]] ; VI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) ; VI: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C3]] - ; VI: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C4]](s16) - ; VI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]] + ; VI: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C4]](s16) + ; VI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]] ; VI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; VI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C3]] ; VI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) ; VI: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C3]] - ; VI: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C4]](s16) - ; VI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] - ; VI: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16) - ; VI: [[MV3:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32), [[MV2]](s32) - ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[MV3]](s96) + ; VI: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C4]](s16) + ; VI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]] + ; VI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; VI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; VI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C5]](s32) + ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] + ; VI: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) + ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) ; GFX9-LABEL: name: test_load_private_s96_align16 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 56) @@ -1248,9 +1374,13 @@ body: | ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX9: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C5]](s32) + ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX9: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C6]](s32) ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p5) :: (load 1, addrspace 56) ; GFX9: [[GEP4:%[0-9]+]]:_(p5) = G_GEP [[GEP3]], [[C]](s32) ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p5) :: (load 1, addrspace 56) @@ -1262,17 +1392,20 @@ body: | ; GFX9: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C3]] ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) ; GFX9: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C3]] - ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) - ; GFX9: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] + ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) + ; GFX9: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]] ; GFX9: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; GFX9: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; GFX9: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) ; GFX9: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C3]] - ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) - ; GFX9: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; GFX9: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) - ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9: [[GEP7:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C6]](s32) + ; GFX9: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) + ; GFX9: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL4]] + ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C5]](s32) + ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; GFX9: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9: [[GEP7:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C7]](s32) ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p5) :: (load 1, addrspace 56) ; GFX9: [[GEP8:%[0-9]+]]:_(p5) = G_GEP [[GEP7]], [[C]](s32) ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p5) :: (load 1, addrspace 56) @@ -1284,17 +1417,20 @@ body: | ; GFX9: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C3]] ; GFX9: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) ; GFX9: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C3]] - ; GFX9: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C4]](s16) - ; GFX9: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]] + ; GFX9: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C4]](s16) + ; GFX9: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]] ; GFX9: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; GFX9: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C3]] ; GFX9: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) ; GFX9: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C3]] - ; GFX9: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C4]](s16) - ; GFX9: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] - ; GFX9: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16) - ; GFX9: [[MV3:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32), [[MV2]](s32) - ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[MV3]](s96) + ; GFX9: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C4]](s16) + ; GFX9: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]] + ; GFX9: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; GFX9: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; GFX9: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C5]](s32) + ; GFX9: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] + ; GFX9: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) + ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) %0:_(p5) = COPY $vgpr0 %1:_(s96) = G_LOAD %0 :: (load 12, align 1, addrspace 56) $vgpr0_vgpr1_vgpr2 = COPY %1 @@ -1419,111 +1555,155 @@ body: | ; SI-LABEL: name: test_load_private_s96_align2 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[GEP:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C]](s32) ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p5) :: (load 2, addrspace 5) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; SI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI: [[GEP1:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C1]](s32) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; SI: [[GEP1:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C3]](s32) ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p5) :: (load 2, addrspace 5) - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; SI: [[GEP2:%[0-9]+]]:_(p5) = G_GEP [[GEP1]], [[C]](s32) ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p5) :: (load 2, addrspace 5) - ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; SI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC2]](s16), [[TRUNC3]](s16) - ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C2]](s32) + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; SI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C4]](s32) ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p5) :: (load 2, addrspace 5) - ; SI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; SI: [[GEP4:%[0-9]+]]:_(p5) = G_GEP [[GEP3]], [[C]](s32) ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p5) :: (load 2, addrspace 5) - ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) - ; SI: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC4]](s16), [[TRUNC5]](s16) - ; SI: [[MV3:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32), [[MV2]](s32) - ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[MV3]](s96) + ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] + ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] + ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) + ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; SI: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32), [[OR2]](s32) + ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) ; CI-LABEL: name: test_load_private_s96_align2 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) - ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[GEP:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C]](s32) ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p5) :: (load 2, addrspace 5) - ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; CI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI: [[GEP1:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C1]](s32) + ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CI: [[GEP1:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C3]](s32) ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p5) :: (load 2, addrspace 5) - ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI: [[GEP2:%[0-9]+]]:_(p5) = G_GEP [[GEP1]], [[C]](s32) ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p5) :: (load 2, addrspace 5) - ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; CI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC2]](s16), [[TRUNC3]](s16) - ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C2]](s32) + ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C4]](s32) ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p5) :: (load 2, addrspace 5) - ; CI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; CI: [[GEP4:%[0-9]+]]:_(p5) = G_GEP [[GEP3]], [[C]](s32) ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p5) :: (load 2, addrspace 5) - ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) - ; CI: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC4]](s16), [[TRUNC5]](s16) - ; CI: [[MV3:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32), [[MV2]](s32) - ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[MV3]](s96) + ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] + ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] + ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) + ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CI: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32), [[OR2]](s32) + ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) ; VI-LABEL: name: test_load_private_s96_align2 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[GEP:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C]](s32) ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p5) :: (load 2, addrspace 5) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; VI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI: [[GEP1:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C1]](s32) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; VI: [[GEP1:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C3]](s32) ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p5) :: (load 2, addrspace 5) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; VI: [[GEP2:%[0-9]+]]:_(p5) = G_GEP [[GEP1]], [[C]](s32) ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p5) :: (load 2, addrspace 5) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; VI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC2]](s16), [[TRUNC3]](s16) - ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C2]](s32) + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; VI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C4]](s32) ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p5) :: (load 2, addrspace 5) - ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; VI: [[GEP4:%[0-9]+]]:_(p5) = G_GEP [[GEP3]], [[C]](s32) ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p5) :: (load 2, addrspace 5) - ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) - ; VI: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC4]](s16), [[TRUNC5]](s16) - ; VI: [[MV3:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32), [[MV2]](s32) - ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[MV3]](s96) + ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] + ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; VI: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32), [[OR2]](s32) + ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) ; GFX9-LABEL: name: test_load_private_s96_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[GEP:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C]](s32) ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p5) :: (load 2, addrspace 5) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9: [[GEP1:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C1]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX9: [[GEP1:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C3]](s32) ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p5) :: (load 2, addrspace 5) - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; GFX9: [[GEP2:%[0-9]+]]:_(p5) = G_GEP [[GEP1]], [[C]](s32) ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p5) :: (load 2, addrspace 5) - ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX9: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C2]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C4]](s32) ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p5) :: (load 2, addrspace 5) - ; GFX9: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; GFX9: [[GEP4:%[0-9]+]]:_(p5) = G_GEP [[GEP3]], [[C]](s32) ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p5) :: (load 2, addrspace 5) - ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) - ; GFX9: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9: [[MV3:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32), [[MV2]](s32) - ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[MV3]](s96) + ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] + ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] + ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) + ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; GFX9: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32), [[OR2]](s32) + ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) %0:_(p5) = COPY $vgpr0 %1:_(s96) = G_LOAD %0 :: (load 12, align 2, addrspace 5) $vgpr0_vgpr1_vgpr2 = COPY %1 @@ -1566,9 +1746,13 @@ body: | ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY3]](s32) ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; SI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C6]](s32) + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32) + ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; SI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C7]](s32) ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p5) :: (load 1, addrspace 5) ; SI: [[GEP4:%[0-9]+]]:_(p5) = G_GEP [[GEP3]], [[C]](s32) ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p5) :: (load 1, addrspace 5) @@ -1581,18 +1765,21 @@ body: | ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]] - ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY5]](s32) - ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; SI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] + ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY5]](s32) + ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) + ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; SI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; SI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C5]] - ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY7]](s32) - ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; SI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) + ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY7]](s32) + ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) + ; SI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] + ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C6]](s32) + ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] ; SI: [[GEP7:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C4]](s32) ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p5) :: (load 1, addrspace 5) ; SI: [[GEP8:%[0-9]+]]:_(p5) = G_GEP [[GEP7]], [[C]](s32) @@ -1606,20 +1793,23 @@ body: | ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C5]] - ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY9]](s32) - ; SI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; SI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] + ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY9]](s32) + ; SI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) + ; SI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] ; SI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; SI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C3]] ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C5]] - ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY11]](s32) - ; SI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) - ; SI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] - ; SI: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16) - ; SI: [[MV3:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32), [[MV2]](s32) - ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[MV3]](s96) + ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY11]](s32) + ; SI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) + ; SI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] + ; SI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; SI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; SI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C6]](s32) + ; SI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] + ; SI: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) + ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) ; CI-LABEL: name: test_load_private_s96_align1 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) @@ -1651,9 +1841,13 @@ body: | ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY3]](s32) ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C6]](s32) + ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32) + ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C7]](s32) ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p5) :: (load 1, addrspace 5) ; CI: [[GEP4:%[0-9]+]]:_(p5) = G_GEP [[GEP3]], [[C]](s32) ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p5) :: (load 1, addrspace 5) @@ -1666,18 +1860,21 @@ body: | ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]] - ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY5]](s32) - ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; CI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] + ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY5]](s32) + ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) + ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; CI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; CI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; CI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C5]] - ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY7]](s32) - ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) + ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY7]](s32) + ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) + ; CI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] + ; CI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C6]](s32) + ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] ; CI: [[GEP7:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C4]](s32) ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p5) :: (load 1, addrspace 5) ; CI: [[GEP8:%[0-9]+]]:_(p5) = G_GEP [[GEP7]], [[C]](s32) @@ -1691,20 +1888,23 @@ body: | ; CI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) ; CI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C5]] - ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY9]](s32) - ; CI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; CI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] + ; CI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY9]](s32) + ; CI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) + ; CI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] ; CI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; CI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C3]] ; CI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) ; CI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C5]] - ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY11]](s32) - ; CI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) - ; CI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] - ; CI: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16) - ; CI: [[MV3:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32), [[MV2]](s32) - ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[MV3]](s96) + ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY11]](s32) + ; CI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) + ; CI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] + ; CI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; CI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; CI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C6]](s32) + ; CI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] + ; CI: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) + ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) ; VI-LABEL: name: test_load_private_s96_align1 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) @@ -1731,9 +1931,13 @@ body: | ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; VI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C5]](s32) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; VI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C6]](s32) ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p5) :: (load 1, addrspace 5) ; VI: [[GEP4:%[0-9]+]]:_(p5) = G_GEP [[GEP3]], [[C]](s32) ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p5) :: (load 1, addrspace 5) @@ -1745,17 +1949,20 @@ body: | ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C3]] ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C3]] - ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) - ; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] + ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) + ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]] ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; VI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C3]] - ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) - ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; VI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) - ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI: [[GEP7:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C6]](s32) + ; VI: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) + ; VI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL4]] + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C5]](s32) + ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; VI: [[GEP7:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C7]](s32) ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p5) :: (load 1, addrspace 5) ; VI: [[GEP8:%[0-9]+]]:_(p5) = G_GEP [[GEP7]], [[C]](s32) ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p5) :: (load 1, addrspace 5) @@ -1767,17 +1974,20 @@ body: | ; VI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C3]] ; VI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) ; VI: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C3]] - ; VI: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C4]](s16) - ; VI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]] + ; VI: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C4]](s16) + ; VI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]] ; VI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; VI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C3]] ; VI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) ; VI: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C3]] - ; VI: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C4]](s16) - ; VI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] - ; VI: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16) - ; VI: [[MV3:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32), [[MV2]](s32) - ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[MV3]](s96) + ; VI: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C4]](s16) + ; VI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]] + ; VI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; VI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; VI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C5]](s32) + ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] + ; VI: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) + ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) ; GFX9-LABEL: name: test_load_private_s96_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) @@ -1804,9 +2014,13 @@ body: | ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX9: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C5]](s32) + ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX9: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C6]](s32) ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p5) :: (load 1, addrspace 5) ; GFX9: [[GEP4:%[0-9]+]]:_(p5) = G_GEP [[GEP3]], [[C]](s32) ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p5) :: (load 1, addrspace 5) @@ -1818,17 +2032,20 @@ body: | ; GFX9: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C3]] ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) ; GFX9: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C3]] - ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) - ; GFX9: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] + ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) + ; GFX9: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]] ; GFX9: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; GFX9: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; GFX9: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) ; GFX9: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C3]] - ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) - ; GFX9: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; GFX9: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) - ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9: [[GEP7:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C6]](s32) + ; GFX9: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) + ; GFX9: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL4]] + ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C5]](s32) + ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; GFX9: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9: [[GEP7:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C7]](s32) ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p5) :: (load 1, addrspace 5) ; GFX9: [[GEP8:%[0-9]+]]:_(p5) = G_GEP [[GEP7]], [[C]](s32) ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p5) :: (load 1, addrspace 5) @@ -1840,17 +2057,20 @@ body: | ; GFX9: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C3]] ; GFX9: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) ; GFX9: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C3]] - ; GFX9: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C4]](s16) - ; GFX9: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]] + ; GFX9: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C4]](s16) + ; GFX9: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]] ; GFX9: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; GFX9: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C3]] ; GFX9: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) ; GFX9: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C3]] - ; GFX9: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C4]](s16) - ; GFX9: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] - ; GFX9: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16) - ; GFX9: [[MV3:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32), [[MV2]](s32) - ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[MV3]](s96) + ; GFX9: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C4]](s16) + ; GFX9: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]] + ; GFX9: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; GFX9: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; GFX9: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C5]](s32) + ; GFX9: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] + ; GFX9: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) + ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) %0:_(p5) = COPY $vgpr0 %1:_(s96) = G_LOAD %0 :: (load 12, align 1, addrspace 5) $vgpr0_vgpr1_vgpr2 = COPY %1 @@ -1893,9 +2113,13 @@ body: | ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY3]](s32) ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; SI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C6]](s32) + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32) + ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; SI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C7]](s32) ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p5) :: (load 1, addrspace 56) ; SI: [[GEP4:%[0-9]+]]:_(p5) = G_GEP [[GEP3]], [[C]](s32) ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p5) :: (load 1, addrspace 56) @@ -1908,18 +2132,21 @@ body: | ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]] - ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY5]](s32) - ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; SI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] + ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY5]](s32) + ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) + ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; SI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; SI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C5]] - ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY7]](s32) - ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; SI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) + ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY7]](s32) + ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) + ; SI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] + ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C6]](s32) + ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] ; SI: [[GEP7:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C4]](s32) ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p5) :: (load 1, addrspace 56) ; SI: [[GEP8:%[0-9]+]]:_(p5) = G_GEP [[GEP7]], [[C]](s32) @@ -1933,20 +2160,23 @@ body: | ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C5]] - ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY9]](s32) - ; SI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; SI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] + ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY9]](s32) + ; SI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) + ; SI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] ; SI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; SI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C3]] ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C5]] - ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY11]](s32) - ; SI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) - ; SI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] - ; SI: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16) - ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; SI: [[GEP11:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C7]](s32) + ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY11]](s32) + ; SI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) + ; SI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] + ; SI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; SI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; SI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C6]](s32) + ; SI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] + ; SI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; SI: [[GEP11:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C8]](s32) ; SI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[GEP11]](p5) :: (load 1, addrspace 56) ; SI: [[GEP12:%[0-9]+]]:_(p5) = G_GEP [[GEP11]], [[C]](s32) ; SI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[GEP12]](p5) :: (load 1, addrspace 56) @@ -1959,20 +2189,23 @@ body: | ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; SI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) ; SI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C5]] - ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY13]](s32) - ; SI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) - ; SI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] + ; SI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY13]](s32) + ; SI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) + ; SI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] ; SI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; SI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C3]] ; SI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; SI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32) ; SI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C5]] - ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY15]](s32) - ; SI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) - ; SI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] - ; SI: [[MV3:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR6]](s16), [[OR7]](s16) - ; SI: [[MV4:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32), [[MV2]](s32), [[MV3]](s32) - ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV4]](s128) + ; SI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY15]](s32) + ; SI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL10]](s32) + ; SI: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] + ; SI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; SI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR10]](s16) + ; SI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C6]](s32) + ; SI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; SI: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) + ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) ; CI-LABEL: name: test_load_private_s128_align16 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 56) @@ -2004,9 +2237,13 @@ body: | ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY3]](s32) ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C6]](s32) + ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32) + ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C7]](s32) ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p5) :: (load 1, addrspace 56) ; CI: [[GEP4:%[0-9]+]]:_(p5) = G_GEP [[GEP3]], [[C]](s32) ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p5) :: (load 1, addrspace 56) @@ -2019,18 +2256,21 @@ body: | ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]] - ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY5]](s32) - ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; CI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] + ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY5]](s32) + ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) + ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; CI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; CI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; CI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C5]] - ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY7]](s32) - ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) + ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY7]](s32) + ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) + ; CI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] + ; CI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C6]](s32) + ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] ; CI: [[GEP7:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C4]](s32) ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p5) :: (load 1, addrspace 56) ; CI: [[GEP8:%[0-9]+]]:_(p5) = G_GEP [[GEP7]], [[C]](s32) @@ -2044,20 +2284,23 @@ body: | ; CI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) ; CI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C5]] - ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY9]](s32) - ; CI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; CI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] + ; CI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY9]](s32) + ; CI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) + ; CI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] ; CI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; CI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C3]] ; CI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) ; CI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C5]] - ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY11]](s32) - ; CI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) - ; CI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] - ; CI: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16) - ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CI: [[GEP11:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C7]](s32) + ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY11]](s32) + ; CI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) + ; CI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] + ; CI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; CI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; CI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C6]](s32) + ; CI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] + ; CI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CI: [[GEP11:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C8]](s32) ; CI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[GEP11]](p5) :: (load 1, addrspace 56) ; CI: [[GEP12:%[0-9]+]]:_(p5) = G_GEP [[GEP11]], [[C]](s32) ; CI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[GEP12]](p5) :: (load 1, addrspace 56) @@ -2070,20 +2313,23 @@ body: | ; CI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) ; CI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C5]] - ; CI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY13]](s32) - ; CI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) - ; CI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] + ; CI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY13]](s32) + ; CI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) + ; CI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] ; CI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; CI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C3]] ; CI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32) ; CI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C5]] - ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY15]](s32) - ; CI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) - ; CI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] - ; CI: [[MV3:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR6]](s16), [[OR7]](s16) - ; CI: [[MV4:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32), [[MV2]](s32), [[MV3]](s32) - ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV4]](s128) + ; CI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY15]](s32) + ; CI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL10]](s32) + ; CI: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] + ; CI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; CI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR10]](s16) + ; CI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C6]](s32) + ; CI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; CI: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) + ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) ; VI-LABEL: name: test_load_private_s128_align16 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 56) @@ -2110,9 +2356,13 @@ body: | ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; VI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C5]](s32) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; VI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C6]](s32) ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p5) :: (load 1, addrspace 56) ; VI: [[GEP4:%[0-9]+]]:_(p5) = G_GEP [[GEP3]], [[C]](s32) ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p5) :: (load 1, addrspace 56) @@ -2124,17 +2374,20 @@ body: | ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C3]] ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C3]] - ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) - ; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] + ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) + ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]] ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; VI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C3]] - ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) - ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; VI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) - ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI: [[GEP7:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C6]](s32) + ; VI: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) + ; VI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL4]] + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C5]](s32) + ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; VI: [[GEP7:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C7]](s32) ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p5) :: (load 1, addrspace 56) ; VI: [[GEP8:%[0-9]+]]:_(p5) = G_GEP [[GEP7]], [[C]](s32) ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p5) :: (load 1, addrspace 56) @@ -2146,17 +2399,20 @@ body: | ; VI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C3]] ; VI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) ; VI: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C3]] - ; VI: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C4]](s16) - ; VI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]] + ; VI: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C4]](s16) + ; VI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]] ; VI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; VI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C3]] ; VI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) ; VI: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C3]] - ; VI: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C4]](s16) - ; VI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] - ; VI: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16) - ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; VI: [[GEP11:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C7]](s32) + ; VI: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C4]](s16) + ; VI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]] + ; VI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; VI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; VI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C5]](s32) + ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] + ; VI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; VI: [[GEP11:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C8]](s32) ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[GEP11]](p5) :: (load 1, addrspace 56) ; VI: [[GEP12:%[0-9]+]]:_(p5) = G_GEP [[GEP11]], [[C]](s32) ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[GEP12]](p5) :: (load 1, addrspace 56) @@ -2168,17 +2424,20 @@ body: | ; VI: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C3]] ; VI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32) ; VI: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C3]] - ; VI: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C4]](s16) - ; VI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL6]] + ; VI: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C4]](s16) + ; VI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL9]] ; VI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; VI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C3]] ; VI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32) ; VI: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C3]] - ; VI: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C4]](s16) - ; VI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL7]] - ; VI: [[MV3:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR6]](s16), [[OR7]](s16) - ; VI: [[MV4:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32), [[MV2]](s32), [[MV3]](s32) - ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV4]](s128) + ; VI: [[SHL10:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C4]](s16) + ; VI: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL10]] + ; VI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; VI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR10]](s16) + ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C5]](s32) + ; VI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; VI: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) + ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) ; GFX9-LABEL: name: test_load_private_s128_align16 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 56) @@ -2205,9 +2464,13 @@ body: | ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX9: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C5]](s32) + ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX9: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C6]](s32) ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p5) :: (load 1, addrspace 56) ; GFX9: [[GEP4:%[0-9]+]]:_(p5) = G_GEP [[GEP3]], [[C]](s32) ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p5) :: (load 1, addrspace 56) @@ -2219,17 +2482,20 @@ body: | ; GFX9: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C3]] ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) ; GFX9: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C3]] - ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) - ; GFX9: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] + ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) + ; GFX9: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]] ; GFX9: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; GFX9: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; GFX9: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) ; GFX9: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C3]] - ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) - ; GFX9: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; GFX9: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) - ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9: [[GEP7:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C6]](s32) + ; GFX9: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) + ; GFX9: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL4]] + ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C5]](s32) + ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; GFX9: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9: [[GEP7:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C7]](s32) ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p5) :: (load 1, addrspace 56) ; GFX9: [[GEP8:%[0-9]+]]:_(p5) = G_GEP [[GEP7]], [[C]](s32) ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p5) :: (load 1, addrspace 56) @@ -2241,17 +2507,20 @@ body: | ; GFX9: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C3]] ; GFX9: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) ; GFX9: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C3]] - ; GFX9: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C4]](s16) - ; GFX9: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]] + ; GFX9: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C4]](s16) + ; GFX9: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]] ; GFX9: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; GFX9: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C3]] ; GFX9: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) ; GFX9: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C3]] - ; GFX9: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C4]](s16) - ; GFX9: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] - ; GFX9: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16) - ; GFX9: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX9: [[GEP11:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C7]](s32) + ; GFX9: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C4]](s16) + ; GFX9: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]] + ; GFX9: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; GFX9: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; GFX9: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C5]](s32) + ; GFX9: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] + ; GFX9: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; GFX9: [[GEP11:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C8]](s32) ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[GEP11]](p5) :: (load 1, addrspace 56) ; GFX9: [[GEP12:%[0-9]+]]:_(p5) = G_GEP [[GEP11]], [[C]](s32) ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[GEP12]](p5) :: (load 1, addrspace 56) @@ -2263,17 +2532,20 @@ body: | ; GFX9: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C3]] ; GFX9: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32) ; GFX9: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C3]] - ; GFX9: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C4]](s16) - ; GFX9: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL6]] + ; GFX9: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C4]](s16) + ; GFX9: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL9]] ; GFX9: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; GFX9: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C3]] ; GFX9: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32) ; GFX9: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C3]] - ; GFX9: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C4]](s16) - ; GFX9: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL7]] - ; GFX9: [[MV3:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR6]](s16), [[OR7]](s16) - ; GFX9: [[MV4:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32), [[MV2]](s32), [[MV3]](s32) - ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV4]](s128) + ; GFX9: [[SHL10:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C4]](s16) + ; GFX9: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL10]] + ; GFX9: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; GFX9: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR10]](s16) + ; GFX9: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C5]](s32) + ; GFX9: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; GFX9: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) %0:_(p5) = COPY $vgpr0 %1:_(s128) = G_LOAD %0 :: (load 16, align 1, addrspace 56) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -2422,143 +2694,199 @@ body: | ; SI-LABEL: name: test_load_private_s128_align2 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[GEP:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C]](s32) ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p5) :: (load 2, addrspace 5) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; SI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI: [[GEP1:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C1]](s32) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; SI: [[GEP1:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C3]](s32) ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p5) :: (load 2, addrspace 5) - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; SI: [[GEP2:%[0-9]+]]:_(p5) = G_GEP [[GEP1]], [[C]](s32) ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p5) :: (load 2, addrspace 5) - ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; SI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC2]](s16), [[TRUNC3]](s16) - ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C2]](s32) + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; SI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C4]](s32) ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p5) :: (load 2, addrspace 5) - ; SI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; SI: [[GEP4:%[0-9]+]]:_(p5) = G_GEP [[GEP3]], [[C]](s32) ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p5) :: (load 2, addrspace 5) - ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) - ; SI: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC4]](s16), [[TRUNC5]](s16) - ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; SI: [[GEP5:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C3]](s32) + ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] + ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] + ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) + ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; SI: [[GEP5:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C5]](s32) ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[GEP5]](p5) :: (load 2, addrspace 5) - ; SI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; SI: [[GEP6:%[0-9]+]]:_(p5) = G_GEP [[GEP5]], [[C]](s32) ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[GEP6]](p5) :: (load 2, addrspace 5) - ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; SI: [[MV3:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC6]](s16), [[TRUNC7]](s16) - ; SI: [[MV4:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32), [[MV2]](s32), [[MV3]](s32) - ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV4]](s128) + ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] + ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) + ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]] + ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32) + ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; SI: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) + ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) ; CI-LABEL: name: test_load_private_s128_align2 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) - ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[GEP:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C]](s32) ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p5) :: (load 2, addrspace 5) - ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; CI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI: [[GEP1:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C1]](s32) + ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CI: [[GEP1:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C3]](s32) ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p5) :: (load 2, addrspace 5) - ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI: [[GEP2:%[0-9]+]]:_(p5) = G_GEP [[GEP1]], [[C]](s32) ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p5) :: (load 2, addrspace 5) - ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; CI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC2]](s16), [[TRUNC3]](s16) - ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C2]](s32) + ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C4]](s32) ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p5) :: (load 2, addrspace 5) - ; CI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; CI: [[GEP4:%[0-9]+]]:_(p5) = G_GEP [[GEP3]], [[C]](s32) ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p5) :: (load 2, addrspace 5) - ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) - ; CI: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC4]](s16), [[TRUNC5]](s16) - ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CI: [[GEP5:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C3]](s32) + ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] + ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] + ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) + ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CI: [[GEP5:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C5]](s32) ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[GEP5]](p5) :: (load 2, addrspace 5) - ; CI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; CI: [[GEP6:%[0-9]+]]:_(p5) = G_GEP [[GEP5]], [[C]](s32) ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[GEP6]](p5) :: (load 2, addrspace 5) - ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; CI: [[MV3:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC6]](s16), [[TRUNC7]](s16) - ; CI: [[MV4:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32), [[MV2]](s32), [[MV3]](s32) - ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV4]](s128) + ; CI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] + ; CI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) + ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]] + ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32) + ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; CI: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) + ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) ; VI-LABEL: name: test_load_private_s128_align2 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[GEP:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C]](s32) ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p5) :: (load 2, addrspace 5) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; VI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI: [[GEP1:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C1]](s32) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; VI: [[GEP1:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C3]](s32) ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p5) :: (load 2, addrspace 5) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; VI: [[GEP2:%[0-9]+]]:_(p5) = G_GEP [[GEP1]], [[C]](s32) ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p5) :: (load 2, addrspace 5) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; VI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC2]](s16), [[TRUNC3]](s16) - ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C2]](s32) + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; VI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C4]](s32) ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p5) :: (load 2, addrspace 5) - ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; VI: [[GEP4:%[0-9]+]]:_(p5) = G_GEP [[GEP3]], [[C]](s32) ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p5) :: (load 2, addrspace 5) - ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) - ; VI: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC4]](s16), [[TRUNC5]](s16) - ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; VI: [[GEP5:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C3]](s32) + ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] + ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; VI: [[GEP5:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C5]](s32) ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[GEP5]](p5) :: (load 2, addrspace 5) - ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; VI: [[GEP6:%[0-9]+]]:_(p5) = G_GEP [[GEP5]], [[C]](s32) ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[GEP6]](p5) :: (load 2, addrspace 5) - ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; VI: [[MV3:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC6]](s16), [[TRUNC7]](s16) - ; VI: [[MV4:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32), [[MV2]](s32), [[MV3]](s32) - ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV4]](s128) + ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] + ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) + ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]] + ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32) + ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; VI: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) + ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) ; GFX9-LABEL: name: test_load_private_s128_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[GEP:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C]](s32) ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p5) :: (load 2, addrspace 5) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9: [[GEP1:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C1]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX9: [[GEP1:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C3]](s32) ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p5) :: (load 2, addrspace 5) - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; GFX9: [[GEP2:%[0-9]+]]:_(p5) = G_GEP [[GEP1]], [[C]](s32) ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p5) :: (load 2, addrspace 5) - ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX9: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C2]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C4]](s32) ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p5) :: (load 2, addrspace 5) - ; GFX9: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; GFX9: [[GEP4:%[0-9]+]]:_(p5) = G_GEP [[GEP3]], [[C]](s32) ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p5) :: (load 2, addrspace 5) - ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) - ; GFX9: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX9: [[GEP5:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C3]](s32) + ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] + ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] + ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) + ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; GFX9: [[GEP5:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C5]](s32) ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[GEP5]](p5) :: (load 2, addrspace 5) - ; GFX9: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; GFX9: [[GEP6:%[0-9]+]]:_(p5) = G_GEP [[GEP5]], [[C]](s32) ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[GEP6]](p5) :: (load 2, addrspace 5) - ; GFX9: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; GFX9: [[MV3:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC6]](s16), [[TRUNC7]](s16) - ; GFX9: [[MV4:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32), [[MV2]](s32), [[MV3]](s32) - ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV4]](s128) + ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; GFX9: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] + ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) + ; GFX9: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]] + ; GFX9: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32) + ; GFX9: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; GFX9: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) %0:_(p5) = COPY $vgpr0 %1:_(s128) = G_LOAD %0 :: (load 16, align 2, addrspace 5) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -2601,9 +2929,13 @@ body: | ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY3]](s32) ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; SI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C6]](s32) + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32) + ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; SI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C7]](s32) ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p5) :: (load 1, addrspace 5) ; SI: [[GEP4:%[0-9]+]]:_(p5) = G_GEP [[GEP3]], [[C]](s32) ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p5) :: (load 1, addrspace 5) @@ -2616,18 +2948,21 @@ body: | ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]] - ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY5]](s32) - ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; SI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] + ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY5]](s32) + ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) + ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; SI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; SI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C5]] - ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY7]](s32) - ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; SI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) + ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY7]](s32) + ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) + ; SI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] + ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C6]](s32) + ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] ; SI: [[GEP7:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C4]](s32) ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p5) :: (load 1, addrspace 5) ; SI: [[GEP8:%[0-9]+]]:_(p5) = G_GEP [[GEP7]], [[C]](s32) @@ -2641,20 +2976,23 @@ body: | ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C5]] - ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY9]](s32) - ; SI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; SI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] + ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY9]](s32) + ; SI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) + ; SI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] ; SI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; SI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C3]] ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C5]] - ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY11]](s32) - ; SI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) - ; SI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] - ; SI: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16) - ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; SI: [[GEP11:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C7]](s32) + ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY11]](s32) + ; SI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) + ; SI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] + ; SI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; SI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; SI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C6]](s32) + ; SI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] + ; SI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; SI: [[GEP11:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C8]](s32) ; SI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[GEP11]](p5) :: (load 1, addrspace 5) ; SI: [[GEP12:%[0-9]+]]:_(p5) = G_GEP [[GEP11]], [[C]](s32) ; SI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[GEP12]](p5) :: (load 1, addrspace 5) @@ -2667,20 +3005,23 @@ body: | ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; SI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) ; SI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C5]] - ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY13]](s32) - ; SI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) - ; SI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] + ; SI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY13]](s32) + ; SI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) + ; SI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] ; SI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; SI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C3]] ; SI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; SI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32) ; SI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C5]] - ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY15]](s32) - ; SI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) - ; SI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] - ; SI: [[MV3:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR6]](s16), [[OR7]](s16) - ; SI: [[MV4:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32), [[MV2]](s32), [[MV3]](s32) - ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV4]](s128) + ; SI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY15]](s32) + ; SI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL10]](s32) + ; SI: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] + ; SI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; SI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR10]](s16) + ; SI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C6]](s32) + ; SI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; SI: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) + ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) ; CI-LABEL: name: test_load_private_s128_align1 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) @@ -2712,9 +3053,13 @@ body: | ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY3]](s32) ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C6]](s32) + ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32) + ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C7]](s32) ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p5) :: (load 1, addrspace 5) ; CI: [[GEP4:%[0-9]+]]:_(p5) = G_GEP [[GEP3]], [[C]](s32) ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p5) :: (load 1, addrspace 5) @@ -2727,18 +3072,21 @@ body: | ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]] - ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY5]](s32) - ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; CI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] + ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY5]](s32) + ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) + ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; CI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; CI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; CI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C5]] - ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY7]](s32) - ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) + ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY7]](s32) + ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) + ; CI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] + ; CI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C6]](s32) + ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] ; CI: [[GEP7:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C4]](s32) ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p5) :: (load 1, addrspace 5) ; CI: [[GEP8:%[0-9]+]]:_(p5) = G_GEP [[GEP7]], [[C]](s32) @@ -2752,20 +3100,23 @@ body: | ; CI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) ; CI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C5]] - ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY9]](s32) - ; CI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; CI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] + ; CI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY9]](s32) + ; CI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) + ; CI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] ; CI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; CI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C3]] ; CI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) ; CI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C5]] - ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY11]](s32) - ; CI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) - ; CI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] - ; CI: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16) - ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CI: [[GEP11:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C7]](s32) + ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY11]](s32) + ; CI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) + ; CI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] + ; CI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; CI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; CI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C6]](s32) + ; CI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] + ; CI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CI: [[GEP11:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C8]](s32) ; CI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[GEP11]](p5) :: (load 1, addrspace 5) ; CI: [[GEP12:%[0-9]+]]:_(p5) = G_GEP [[GEP11]], [[C]](s32) ; CI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[GEP12]](p5) :: (load 1, addrspace 5) @@ -2778,20 +3129,23 @@ body: | ; CI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) ; CI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C5]] - ; CI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY13]](s32) - ; CI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) - ; CI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] + ; CI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY13]](s32) + ; CI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) + ; CI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] ; CI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; CI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C3]] ; CI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32) ; CI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C5]] - ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY15]](s32) - ; CI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) - ; CI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] - ; CI: [[MV3:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR6]](s16), [[OR7]](s16) - ; CI: [[MV4:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32), [[MV2]](s32), [[MV3]](s32) - ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV4]](s128) + ; CI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY15]](s32) + ; CI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL10]](s32) + ; CI: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] + ; CI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; CI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR10]](s16) + ; CI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C6]](s32) + ; CI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; CI: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) + ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) ; VI-LABEL: name: test_load_private_s128_align1 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) @@ -2818,9 +3172,13 @@ body: | ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; VI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C5]](s32) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; VI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C6]](s32) ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p5) :: (load 1, addrspace 5) ; VI: [[GEP4:%[0-9]+]]:_(p5) = G_GEP [[GEP3]], [[C]](s32) ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p5) :: (load 1, addrspace 5) @@ -2832,17 +3190,20 @@ body: | ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C3]] ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C3]] - ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) - ; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] + ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) + ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]] ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; VI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C3]] - ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) - ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; VI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) - ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI: [[GEP7:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C6]](s32) + ; VI: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) + ; VI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL4]] + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C5]](s32) + ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; VI: [[GEP7:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C7]](s32) ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p5) :: (load 1, addrspace 5) ; VI: [[GEP8:%[0-9]+]]:_(p5) = G_GEP [[GEP7]], [[C]](s32) ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p5) :: (load 1, addrspace 5) @@ -2854,17 +3215,20 @@ body: | ; VI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C3]] ; VI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) ; VI: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C3]] - ; VI: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C4]](s16) - ; VI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]] + ; VI: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C4]](s16) + ; VI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]] ; VI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; VI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C3]] ; VI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) ; VI: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C3]] - ; VI: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C4]](s16) - ; VI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] - ; VI: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16) - ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; VI: [[GEP11:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C7]](s32) + ; VI: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C4]](s16) + ; VI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]] + ; VI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; VI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; VI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C5]](s32) + ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] + ; VI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; VI: [[GEP11:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C8]](s32) ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[GEP11]](p5) :: (load 1, addrspace 5) ; VI: [[GEP12:%[0-9]+]]:_(p5) = G_GEP [[GEP11]], [[C]](s32) ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[GEP12]](p5) :: (load 1, addrspace 5) @@ -2876,17 +3240,20 @@ body: | ; VI: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C3]] ; VI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32) ; VI: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C3]] - ; VI: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C4]](s16) - ; VI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL6]] + ; VI: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C4]](s16) + ; VI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL9]] ; VI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; VI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C3]] ; VI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32) ; VI: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C3]] - ; VI: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C4]](s16) - ; VI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL7]] - ; VI: [[MV3:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR6]](s16), [[OR7]](s16) - ; VI: [[MV4:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32), [[MV2]](s32), [[MV3]](s32) - ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV4]](s128) + ; VI: [[SHL10:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C4]](s16) + ; VI: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL10]] + ; VI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; VI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR10]](s16) + ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C5]](s32) + ; VI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; VI: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) + ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) ; GFX9-LABEL: name: test_load_private_s128_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) @@ -2913,9 +3280,13 @@ body: | ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX9: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C5]](s32) + ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX9: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C6]](s32) ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p5) :: (load 1, addrspace 5) ; GFX9: [[GEP4:%[0-9]+]]:_(p5) = G_GEP [[GEP3]], [[C]](s32) ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p5) :: (load 1, addrspace 5) @@ -2927,17 +3298,20 @@ body: | ; GFX9: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C3]] ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) ; GFX9: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C3]] - ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) - ; GFX9: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] + ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) + ; GFX9: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]] ; GFX9: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; GFX9: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; GFX9: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) ; GFX9: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C3]] - ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) - ; GFX9: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; GFX9: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) - ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9: [[GEP7:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C6]](s32) + ; GFX9: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) + ; GFX9: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL4]] + ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C5]](s32) + ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; GFX9: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9: [[GEP7:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C7]](s32) ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p5) :: (load 1, addrspace 5) ; GFX9: [[GEP8:%[0-9]+]]:_(p5) = G_GEP [[GEP7]], [[C]](s32) ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p5) :: (load 1, addrspace 5) @@ -2949,17 +3323,20 @@ body: | ; GFX9: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C3]] ; GFX9: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) ; GFX9: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C3]] - ; GFX9: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C4]](s16) - ; GFX9: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]] + ; GFX9: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C4]](s16) + ; GFX9: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]] ; GFX9: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; GFX9: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C3]] ; GFX9: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) ; GFX9: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C3]] - ; GFX9: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C4]](s16) - ; GFX9: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] - ; GFX9: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16) - ; GFX9: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX9: [[GEP11:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C7]](s32) + ; GFX9: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C4]](s16) + ; GFX9: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]] + ; GFX9: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; GFX9: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; GFX9: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C5]](s32) + ; GFX9: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] + ; GFX9: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; GFX9: [[GEP11:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C8]](s32) ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[GEP11]](p5) :: (load 1, addrspace 5) ; GFX9: [[GEP12:%[0-9]+]]:_(p5) = G_GEP [[GEP11]], [[C]](s32) ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[GEP12]](p5) :: (load 1, addrspace 5) @@ -2971,17 +3348,20 @@ body: | ; GFX9: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C3]] ; GFX9: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32) ; GFX9: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C3]] - ; GFX9: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C4]](s16) - ; GFX9: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL6]] + ; GFX9: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C4]](s16) + ; GFX9: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL9]] ; GFX9: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; GFX9: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C3]] ; GFX9: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32) ; GFX9: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C3]] - ; GFX9: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C4]](s16) - ; GFX9: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL7]] - ; GFX9: [[MV3:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR6]](s16), [[OR7]](s16) - ; GFX9: [[MV4:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32), [[MV2]](s32), [[MV3]](s32) - ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV4]](s128) + ; GFX9: [[SHL10:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C4]](s16) + ; GFX9: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL10]] + ; GFX9: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; GFX9: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR10]](s16) + ; GFX9: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C5]](s32) + ; GFX9: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; GFX9: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) %0:_(p5) = COPY $vgpr0 %1:_(s128) = G_LOAD %0 :: (load 16, align 1, addrspace 5) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -3082,79 +3462,111 @@ body: | ; SI-LABEL: name: test_load_private_p1_align2 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[GEP:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C]](s32) ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p5) :: (load 2, addrspace 5) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; SI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI: [[GEP1:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C1]](s32) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; SI: [[GEP1:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C3]](s32) ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p5) :: (load 2, addrspace 5) - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; SI: [[GEP2:%[0-9]+]]:_(p5) = G_GEP [[GEP1]], [[C]](s32) ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p5) :: (load 2, addrspace 5) - ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; SI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC2]](s16), [[TRUNC3]](s16) - ; SI: [[MV2:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32) - ; SI: $vgpr0_vgpr1 = COPY [[MV2]](p1) + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; SI: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) + ; SI: $vgpr0_vgpr1 = COPY [[MV]](p1) ; CI-LABEL: name: test_load_private_p1_align2 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) - ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[GEP:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C]](s32) ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p5) :: (load 2, addrspace 5) - ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; CI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI: [[GEP1:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C1]](s32) + ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CI: [[GEP1:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C3]](s32) ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p5) :: (load 2, addrspace 5) - ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI: [[GEP2:%[0-9]+]]:_(p5) = G_GEP [[GEP1]], [[C]](s32) ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p5) :: (load 2, addrspace 5) - ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; CI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC2]](s16), [[TRUNC3]](s16) - ; CI: [[MV2:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32) - ; CI: $vgpr0_vgpr1 = COPY [[MV2]](p1) + ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) + ; CI: $vgpr0_vgpr1 = COPY [[MV]](p1) ; VI-LABEL: name: test_load_private_p1_align2 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[GEP:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C]](s32) ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p5) :: (load 2, addrspace 5) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; VI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI: [[GEP1:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C1]](s32) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; VI: [[GEP1:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C3]](s32) ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p5) :: (load 2, addrspace 5) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; VI: [[GEP2:%[0-9]+]]:_(p5) = G_GEP [[GEP1]], [[C]](s32) ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p5) :: (load 2, addrspace 5) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; VI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC2]](s16), [[TRUNC3]](s16) - ; VI: [[MV2:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32) - ; VI: $vgpr0_vgpr1 = COPY [[MV2]](p1) + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; VI: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) + ; VI: $vgpr0_vgpr1 = COPY [[MV]](p1) ; GFX9-LABEL: name: test_load_private_p1_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[GEP:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C]](s32) ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p5) :: (load 2, addrspace 5) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9: [[GEP1:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C1]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX9: [[GEP1:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C3]](s32) ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p5) :: (load 2, addrspace 5) - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; GFX9: [[GEP2:%[0-9]+]]:_(p5) = G_GEP [[GEP1]], [[C]](s32) ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p5) :: (load 2, addrspace 5) - ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX9: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9: [[MV2:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32) - ; GFX9: $vgpr0_vgpr1 = COPY [[MV2]](p1) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; GFX9: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) + ; GFX9: $vgpr0_vgpr1 = COPY [[MV]](p1) %0:_(p5) = COPY $vgpr0 %1:_(p1) = G_LOAD %0 :: (load 8, align 2, addrspace 5) $vgpr0_vgpr1 = COPY %1 @@ -3196,9 +3608,13 @@ body: | ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; SI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C6]](s32) + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32) + ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; SI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C7]](s32) ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p5) :: (load 1, addrspace 5) ; SI: [[GEP4:%[0-9]+]]:_(p5) = G_GEP [[GEP3]], [[C]](s32) ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p5) :: (load 1, addrspace 5) @@ -3211,20 +3627,23 @@ body: | ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] - ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) - ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; SI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] + ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) + ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) + ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; SI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; SI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C5]] - ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY6]](s32) - ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; SI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) - ; SI: [[MV2:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32) - ; SI: $vgpr0_vgpr1 = COPY [[MV2]](p1) + ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY6]](s32) + ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) + ; SI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] + ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C6]](s32) + ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; SI: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32) + ; SI: $vgpr0_vgpr1 = COPY [[MV]](p1) ; CI-LABEL: name: test_load_private_p1_align1 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) @@ -3255,9 +3674,13 @@ body: | ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C6]](s32) + ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32) + ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C7]](s32) ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p5) :: (load 1, addrspace 5) ; CI: [[GEP4:%[0-9]+]]:_(p5) = G_GEP [[GEP3]], [[C]](s32) ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p5) :: (load 1, addrspace 5) @@ -3270,20 +3693,23 @@ body: | ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] - ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) - ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; CI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] + ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) + ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) + ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; CI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; CI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C5]] - ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY6]](s32) - ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) - ; CI: [[MV2:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32) - ; CI: $vgpr0_vgpr1 = COPY [[MV2]](p1) + ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY6]](s32) + ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) + ; CI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] + ; CI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C6]](s32) + ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; CI: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32) + ; CI: $vgpr0_vgpr1 = COPY [[MV]](p1) ; VI-LABEL: name: test_load_private_p1_align1 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) @@ -3310,9 +3736,13 @@ body: | ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; VI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C5]](s32) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; VI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C6]](s32) ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p5) :: (load 1, addrspace 5) ; VI: [[GEP4:%[0-9]+]]:_(p5) = G_GEP [[GEP3]], [[C]](s32) ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p5) :: (load 1, addrspace 5) @@ -3324,17 +3754,20 @@ body: | ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C3]] ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C3]] - ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) - ; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] + ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) + ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]] ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; VI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C3]] - ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) - ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; VI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) - ; VI: [[MV2:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32) - ; VI: $vgpr0_vgpr1 = COPY [[MV2]](p1) + ; VI: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) + ; VI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL4]] + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C5]](s32) + ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; VI: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32) + ; VI: $vgpr0_vgpr1 = COPY [[MV]](p1) ; GFX9-LABEL: name: test_load_private_p1_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) @@ -3361,9 +3794,13 @@ body: | ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX9: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C5]](s32) + ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX9: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C6]](s32) ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p5) :: (load 1, addrspace 5) ; GFX9: [[GEP4:%[0-9]+]]:_(p5) = G_GEP [[GEP3]], [[C]](s32) ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p5) :: (load 1, addrspace 5) @@ -3375,17 +3812,20 @@ body: | ; GFX9: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C3]] ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) ; GFX9: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C3]] - ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) - ; GFX9: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] + ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) + ; GFX9: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]] ; GFX9: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; GFX9: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; GFX9: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) ; GFX9: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C3]] - ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) - ; GFX9: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; GFX9: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) - ; GFX9: [[MV2:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32) - ; GFX9: $vgpr0_vgpr1 = COPY [[MV2]](p1) + ; GFX9: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) + ; GFX9: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL4]] + ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C5]](s32) + ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; GFX9: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32) + ; GFX9: $vgpr0_vgpr1 = COPY [[MV]](p1) %0:_(p5) = COPY $vgpr0 %1:_(p1) = G_LOAD %0 :: (load 8, align 1, addrspace 5) $vgpr0_vgpr1 = COPY %1 @@ -3427,43 +3867,67 @@ body: | ; SI-LABEL: name: test_load_private_p3_align2 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[GEP:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C]](s32) ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p5) :: (load 2, addrspace 5) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; SI: [[MV:%[0-9]+]]:_(p3) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; SI: $vgpr0 = COPY [[MV]](p3) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; SI: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](s32) + ; SI: $vgpr0 = COPY [[INTTOPTR]](p3) ; CI-LABEL: name: test_load_private_p3_align2 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) - ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[GEP:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C]](s32) ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p5) :: (load 2, addrspace 5) - ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; CI: [[MV:%[0-9]+]]:_(p3) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; CI: $vgpr0 = COPY [[MV]](p3) + ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](s32) + ; CI: $vgpr0 = COPY [[INTTOPTR]](p3) ; VI-LABEL: name: test_load_private_p3_align2 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[GEP:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C]](s32) ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p5) :: (load 2, addrspace 5) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; VI: [[MV:%[0-9]+]]:_(p3) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; VI: $vgpr0 = COPY [[MV]](p3) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; VI: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](s32) + ; VI: $vgpr0 = COPY [[INTTOPTR]](p3) ; GFX9-LABEL: name: test_load_private_p3_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[GEP:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C]](s32) ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p5) :: (load 2, addrspace 5) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9: [[MV:%[0-9]+]]:_(p3) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9: $vgpr0 = COPY [[MV]](p3) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX9: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](s32) + ; GFX9: $vgpr0 = COPY [[INTTOPTR]](p3) %0:_(p5) = COPY $vgpr0 %1:_(p3) = G_LOAD %0 :: (load 4, align 2, addrspace 5) $vgpr0 = COPY %1 @@ -3505,8 +3969,13 @@ body: | ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; SI: [[MV:%[0-9]+]]:_(p3) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; SI: $vgpr0 = COPY [[MV]](p3) + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32) + ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; SI: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32) + ; SI: $vgpr0 = COPY [[INTTOPTR]](p3) ; CI-LABEL: name: test_load_private_p3_align1 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) @@ -3537,8 +4006,13 @@ body: | ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI: [[MV:%[0-9]+]]:_(p3) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; CI: $vgpr0 = COPY [[MV]](p3) + ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32) + ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; CI: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32) + ; CI: $vgpr0 = COPY [[INTTOPTR]](p3) ; VI-LABEL: name: test_load_private_p3_align1 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) @@ -3565,8 +4039,13 @@ body: | ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; VI: [[MV:%[0-9]+]]:_(p3) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; VI: $vgpr0 = COPY [[MV]](p3) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; VI: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32) + ; VI: $vgpr0 = COPY [[INTTOPTR]](p3) ; GFX9-LABEL: name: test_load_private_p3_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) @@ -3593,8 +4072,13 @@ body: | ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX9: [[MV:%[0-9]+]]:_(p3) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; GFX9: $vgpr0 = COPY [[MV]](p3) + ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; GFX9: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32) + ; GFX9: $vgpr0 = COPY [[INTTOPTR]](p3) %0:_(p5) = COPY $vgpr0 %1:_(p3) = G_LOAD %0 :: (load 4, align 1, addrspace 5) $vgpr0 = COPY %1 @@ -3636,43 +4120,67 @@ body: | ; SI-LABEL: name: test_load_private_p5_align2 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[GEP:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C]](s32) ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p5) :: (load 2, addrspace 5) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; SI: [[MV:%[0-9]+]]:_(p5) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; SI: $vgpr0 = COPY [[MV]](p5) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; SI: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) + ; SI: $vgpr0 = COPY [[INTTOPTR]](p5) ; CI-LABEL: name: test_load_private_p5_align2 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) - ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[GEP:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C]](s32) ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p5) :: (load 2, addrspace 5) - ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; CI: [[MV:%[0-9]+]]:_(p5) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; CI: $vgpr0 = COPY [[MV]](p5) + ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) + ; CI: $vgpr0 = COPY [[INTTOPTR]](p5) ; VI-LABEL: name: test_load_private_p5_align2 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[GEP:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C]](s32) ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p5) :: (load 2, addrspace 5) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; VI: [[MV:%[0-9]+]]:_(p5) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; VI: $vgpr0 = COPY [[MV]](p5) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; VI: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) + ; VI: $vgpr0 = COPY [[INTTOPTR]](p5) ; GFX9-LABEL: name: test_load_private_p5_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[GEP:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C]](s32) ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p5) :: (load 2, addrspace 5) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9: [[MV:%[0-9]+]]:_(p5) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9: $vgpr0 = COPY [[MV]](p5) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX9: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) + ; GFX9: $vgpr0 = COPY [[INTTOPTR]](p5) %0:_(p5) = COPY $vgpr0 %1:_(p5) = G_LOAD %0 :: (load 4, align 2, addrspace 5) $vgpr0 = COPY %1 @@ -3714,8 +4222,13 @@ body: | ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; SI: [[MV:%[0-9]+]]:_(p5) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; SI: $vgpr0 = COPY [[MV]](p5) + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32) + ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; SI: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) + ; SI: $vgpr0 = COPY [[INTTOPTR]](p5) ; CI-LABEL: name: test_load_private_p5_align1 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) @@ -3746,8 +4259,13 @@ body: | ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI: [[MV:%[0-9]+]]:_(p5) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; CI: $vgpr0 = COPY [[MV]](p5) + ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32) + ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; CI: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) + ; CI: $vgpr0 = COPY [[INTTOPTR]](p5) ; VI-LABEL: name: test_load_private_p5_align1 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) @@ -3774,8 +4292,13 @@ body: | ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; VI: [[MV:%[0-9]+]]:_(p5) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; VI: $vgpr0 = COPY [[MV]](p5) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; VI: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) + ; VI: $vgpr0 = COPY [[INTTOPTR]](p5) ; GFX9-LABEL: name: test_load_private_p5_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) @@ -3802,8 +4325,13 @@ body: | ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX9: [[MV:%[0-9]+]]:_(p5) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; GFX9: $vgpr0 = COPY [[MV]](p5) + ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; GFX9: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) + ; GFX9: $vgpr0 = COPY [[INTTOPTR]](p5) %0:_(p5) = COPY $vgpr0 %1:_(p5) = G_LOAD %0 :: (load 4, align 1, addrspace 5) $vgpr0 = COPY %1 @@ -5489,9 +6017,13 @@ body: | ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; SI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C6]](s32) + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32) + ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; SI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C7]](s32) ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p5) :: (load 1, addrspace 5) ; SI: [[GEP4:%[0-9]+]]:_(p5) = G_GEP [[GEP3]], [[C]](s32) ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p5) :: (load 1, addrspace 5) @@ -5504,19 +6036,22 @@ body: | ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] - ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) - ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; SI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] + ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) + ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) + ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; SI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; SI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C5]] - ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY6]](s32) - ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; SI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32) + ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY6]](s32) + ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) + ; SI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] + ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C6]](s32) + ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; CI-LABEL: name: test_load_private_v2s32_align1 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 @@ -5548,9 +6083,13 @@ body: | ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C6]](s32) + ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32) + ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C7]](s32) ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p5) :: (load 1, addrspace 5) ; CI: [[GEP4:%[0-9]+]]:_(p5) = G_GEP [[GEP3]], [[C]](s32) ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p5) :: (load 1, addrspace 5) @@ -5563,19 +6102,22 @@ body: | ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] - ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) - ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; CI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] + ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) + ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) + ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; CI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; CI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C5]] - ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY6]](s32) - ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) - ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32) + ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY6]](s32) + ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) + ; CI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] + ; CI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C6]](s32) + ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) ; CI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; VI-LABEL: name: test_load_private_v2s32_align1 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 @@ -5603,9 +6145,13 @@ body: | ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; VI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C5]](s32) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; VI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C6]](s32) ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p5) :: (load 1, addrspace 5) ; VI: [[GEP4:%[0-9]+]]:_(p5) = G_GEP [[GEP3]], [[C]](s32) ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p5) :: (load 1, addrspace 5) @@ -5617,16 +6163,19 @@ body: | ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C3]] ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C3]] - ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) - ; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] + ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) + ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]] ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; VI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C3]] - ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) - ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; VI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32) + ; VI: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) + ; VI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL4]] + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C5]](s32) + ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-LABEL: name: test_load_private_v2s32_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 @@ -5654,9 +6203,13 @@ body: | ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX9: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C5]](s32) + ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX9: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C6]](s32) ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p5) :: (load 1, addrspace 5) ; GFX9: [[GEP4:%[0-9]+]]:_(p5) = G_GEP [[GEP3]], [[C]](s32) ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p5) :: (load 1, addrspace 5) @@ -5668,16 +6221,19 @@ body: | ; GFX9: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C3]] ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) ; GFX9: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C3]] - ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) - ; GFX9: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] + ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) + ; GFX9: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]] ; GFX9: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; GFX9: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; GFX9: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) ; GFX9: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C3]] - ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) - ; GFX9: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; GFX9: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32) + ; GFX9: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) + ; GFX9: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL4]] + ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C5]](s32) + ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(p5) = COPY $vgpr0 %1:_(<2 x s32>) = G_LOAD %0 :: (load 8, align 1, addrspace 5) @@ -5721,9 +6277,13 @@ body: | ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY3]](s32) ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; SI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C6]](s32) + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32) + ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; SI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C7]](s32) ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p5) :: (load 1, addrspace 56) ; SI: [[GEP4:%[0-9]+]]:_(p5) = G_GEP [[GEP3]], [[C]](s32) ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p5) :: (load 1, addrspace 56) @@ -5736,18 +6296,21 @@ body: | ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]] - ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY5]](s32) - ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; SI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] + ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY5]](s32) + ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) + ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; SI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; SI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C5]] - ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY7]](s32) - ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; SI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) + ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY7]](s32) + ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) + ; SI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] + ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C6]](s32) + ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] ; SI: [[GEP7:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C4]](s32) ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p5) :: (load 1, addrspace 56) ; SI: [[GEP8:%[0-9]+]]:_(p5) = G_GEP [[GEP7]], [[C]](s32) @@ -5761,19 +6324,22 @@ body: | ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C5]] - ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY9]](s32) - ; SI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; SI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] + ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY9]](s32) + ; SI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) + ; SI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] ; SI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; SI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C3]] ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C5]] - ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY11]](s32) - ; SI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) - ; SI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] - ; SI: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32), [[MV2]](s32) + ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY11]](s32) + ; SI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) + ; SI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] + ; SI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; SI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; SI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C6]](s32) + ; SI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) ; CI-LABEL: name: test_load_private_v3s32_align16 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 @@ -5806,9 +6372,13 @@ body: | ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY3]](s32) ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C6]](s32) + ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32) + ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C7]](s32) ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p5) :: (load 1, addrspace 56) ; CI: [[GEP4:%[0-9]+]]:_(p5) = G_GEP [[GEP3]], [[C]](s32) ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p5) :: (load 1, addrspace 56) @@ -5821,18 +6391,21 @@ body: | ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]] - ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY5]](s32) - ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; CI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] + ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY5]](s32) + ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) + ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; CI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; CI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; CI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C5]] - ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY7]](s32) - ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) + ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY7]](s32) + ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) + ; CI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] + ; CI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C6]](s32) + ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] ; CI: [[GEP7:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C4]](s32) ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p5) :: (load 1, addrspace 56) ; CI: [[GEP8:%[0-9]+]]:_(p5) = G_GEP [[GEP7]], [[C]](s32) @@ -5846,19 +6419,22 @@ body: | ; CI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) ; CI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C5]] - ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY9]](s32) - ; CI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; CI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] + ; CI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY9]](s32) + ; CI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) + ; CI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] ; CI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; CI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C3]] ; CI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) ; CI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C5]] - ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY11]](s32) - ; CI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) - ; CI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] - ; CI: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16) - ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32), [[MV2]](s32) + ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY11]](s32) + ; CI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) + ; CI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] + ; CI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; CI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; CI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C6]](s32) + ; CI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] + ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) ; VI-LABEL: name: test_load_private_v3s32_align16 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 @@ -5886,9 +6462,13 @@ body: | ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; VI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C5]](s32) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; VI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C6]](s32) ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p5) :: (load 1, addrspace 56) ; VI: [[GEP4:%[0-9]+]]:_(p5) = G_GEP [[GEP3]], [[C]](s32) ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p5) :: (load 1, addrspace 56) @@ -5900,17 +6480,20 @@ body: | ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C3]] ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C3]] - ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) - ; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] + ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) + ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]] ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; VI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C3]] - ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) - ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; VI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) - ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI: [[GEP7:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C6]](s32) + ; VI: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) + ; VI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL4]] + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C5]](s32) + ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; VI: [[GEP7:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C7]](s32) ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p5) :: (load 1, addrspace 56) ; VI: [[GEP8:%[0-9]+]]:_(p5) = G_GEP [[GEP7]], [[C]](s32) ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p5) :: (load 1, addrspace 56) @@ -5922,16 +6505,19 @@ body: | ; VI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C3]] ; VI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) ; VI: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C3]] - ; VI: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C4]](s16) - ; VI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]] + ; VI: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C4]](s16) + ; VI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]] ; VI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; VI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C3]] ; VI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) ; VI: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C3]] - ; VI: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C4]](s16) - ; VI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] - ; VI: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32), [[MV2]](s32) + ; VI: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C4]](s16) + ; VI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]] + ; VI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; VI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; VI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C5]](s32) + ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) ; GFX9-LABEL: name: test_load_private_v3s32_align16 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 @@ -5959,9 +6545,13 @@ body: | ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX9: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C5]](s32) + ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX9: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C6]](s32) ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p5) :: (load 1, addrspace 56) ; GFX9: [[GEP4:%[0-9]+]]:_(p5) = G_GEP [[GEP3]], [[C]](s32) ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p5) :: (load 1, addrspace 56) @@ -5973,17 +6563,20 @@ body: | ; GFX9: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C3]] ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) ; GFX9: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C3]] - ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) - ; GFX9: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] + ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) + ; GFX9: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]] ; GFX9: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; GFX9: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; GFX9: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) ; GFX9: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C3]] - ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) - ; GFX9: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; GFX9: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) - ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9: [[GEP7:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C6]](s32) + ; GFX9: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) + ; GFX9: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL4]] + ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C5]](s32) + ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; GFX9: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9: [[GEP7:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C7]](s32) ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p5) :: (load 1, addrspace 56) ; GFX9: [[GEP8:%[0-9]+]]:_(p5) = G_GEP [[GEP7]], [[C]](s32) ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p5) :: (load 1, addrspace 56) @@ -5995,16 +6588,19 @@ body: | ; GFX9: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C3]] ; GFX9: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) ; GFX9: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C3]] - ; GFX9: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C4]](s16) - ; GFX9: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]] + ; GFX9: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C4]](s16) + ; GFX9: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]] ; GFX9: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; GFX9: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C3]] ; GFX9: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) ; GFX9: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C3]] - ; GFX9: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C4]](s16) - ; GFX9: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] - ; GFX9: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32), [[MV2]](s32) + ; GFX9: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C4]](s16) + ; GFX9: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]] + ; GFX9: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; GFX9: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; GFX9: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C5]](s32) + ; GFX9: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(p5) = COPY $vgpr0 %1:_(<3 x s32>) = G_LOAD %0 :: (load 12, align 1, addrspace 56) @@ -6103,9 +6699,13 @@ body: | ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY3]](s32) ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; SI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C6]](s32) + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32) + ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; SI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C7]](s32) ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p5) :: (load 1, addrspace 56) ; SI: [[GEP4:%[0-9]+]]:_(p5) = G_GEP [[GEP3]], [[C]](s32) ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p5) :: (load 1, addrspace 56) @@ -6118,18 +6718,21 @@ body: | ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]] - ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY5]](s32) - ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; SI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] + ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY5]](s32) + ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) + ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; SI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; SI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C5]] - ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY7]](s32) - ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; SI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) + ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY7]](s32) + ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) + ; SI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] + ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C6]](s32) + ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] ; SI: [[GEP7:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C4]](s32) ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p5) :: (load 1, addrspace 56) ; SI: [[GEP8:%[0-9]+]]:_(p5) = G_GEP [[GEP7]], [[C]](s32) @@ -6143,20 +6746,23 @@ body: | ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C5]] - ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY9]](s32) - ; SI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; SI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] + ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY9]](s32) + ; SI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) + ; SI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] ; SI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; SI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C3]] ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C5]] - ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY11]](s32) - ; SI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) - ; SI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] - ; SI: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16) - ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; SI: [[GEP11:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C7]](s32) + ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY11]](s32) + ; SI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) + ; SI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] + ; SI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; SI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; SI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C6]](s32) + ; SI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] + ; SI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; SI: [[GEP11:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C8]](s32) ; SI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[GEP11]](p5) :: (load 1, addrspace 56) ; SI: [[GEP12:%[0-9]+]]:_(p5) = G_GEP [[GEP11]], [[C]](s32) ; SI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[GEP12]](p5) :: (load 1, addrspace 56) @@ -6169,19 +6775,22 @@ body: | ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; SI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) ; SI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C5]] - ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY13]](s32) - ; SI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) - ; SI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] + ; SI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY13]](s32) + ; SI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) + ; SI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] ; SI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; SI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C3]] ; SI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; SI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32) ; SI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C5]] - ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY15]](s32) - ; SI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) - ; SI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] - ; SI: [[MV3:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR6]](s16), [[OR7]](s16) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32), [[MV2]](s32), [[MV3]](s32) + ; SI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY15]](s32) + ; SI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL10]](s32) + ; SI: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] + ; SI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; SI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR10]](s16) + ; SI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C6]](s32) + ; SI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) ; CI-LABEL: name: test_load_private_v4s32_align16 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 @@ -6214,9 +6823,13 @@ body: | ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY3]](s32) ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C6]](s32) + ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32) + ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C7]](s32) ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p5) :: (load 1, addrspace 56) ; CI: [[GEP4:%[0-9]+]]:_(p5) = G_GEP [[GEP3]], [[C]](s32) ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p5) :: (load 1, addrspace 56) @@ -6229,18 +6842,21 @@ body: | ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]] - ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY5]](s32) - ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; CI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] + ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY5]](s32) + ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) + ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; CI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; CI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; CI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C5]] - ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY7]](s32) - ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) + ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY7]](s32) + ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) + ; CI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] + ; CI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C6]](s32) + ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] ; CI: [[GEP7:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C4]](s32) ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p5) :: (load 1, addrspace 56) ; CI: [[GEP8:%[0-9]+]]:_(p5) = G_GEP [[GEP7]], [[C]](s32) @@ -6254,20 +6870,23 @@ body: | ; CI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) ; CI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C5]] - ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY9]](s32) - ; CI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; CI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] + ; CI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY9]](s32) + ; CI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) + ; CI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] ; CI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; CI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C3]] ; CI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) ; CI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C5]] - ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY11]](s32) - ; CI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) - ; CI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] - ; CI: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16) - ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CI: [[GEP11:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C7]](s32) + ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY11]](s32) + ; CI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) + ; CI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] + ; CI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; CI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; CI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C6]](s32) + ; CI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] + ; CI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CI: [[GEP11:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C8]](s32) ; CI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[GEP11]](p5) :: (load 1, addrspace 56) ; CI: [[GEP12:%[0-9]+]]:_(p5) = G_GEP [[GEP11]], [[C]](s32) ; CI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[GEP12]](p5) :: (load 1, addrspace 56) @@ -6280,19 +6899,22 @@ body: | ; CI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) ; CI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C5]] - ; CI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY13]](s32) - ; CI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) - ; CI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] + ; CI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY13]](s32) + ; CI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) + ; CI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] ; CI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; CI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C3]] ; CI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32) ; CI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C5]] - ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY15]](s32) - ; CI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) - ; CI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] - ; CI: [[MV3:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR6]](s16), [[OR7]](s16) - ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32), [[MV2]](s32), [[MV3]](s32) + ; CI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY15]](s32) + ; CI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL10]](s32) + ; CI: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] + ; CI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; CI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR10]](s16) + ; CI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C6]](s32) + ; CI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) ; VI-LABEL: name: test_load_private_v4s32_align16 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 @@ -6320,9 +6942,13 @@ body: | ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; VI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C5]](s32) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; VI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C6]](s32) ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p5) :: (load 1, addrspace 56) ; VI: [[GEP4:%[0-9]+]]:_(p5) = G_GEP [[GEP3]], [[C]](s32) ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p5) :: (load 1, addrspace 56) @@ -6334,17 +6960,20 @@ body: | ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C3]] ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C3]] - ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) - ; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] + ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) + ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]] ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; VI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C3]] - ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) - ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; VI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) - ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI: [[GEP7:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C6]](s32) + ; VI: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) + ; VI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL4]] + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C5]](s32) + ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; VI: [[GEP7:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C7]](s32) ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p5) :: (load 1, addrspace 56) ; VI: [[GEP8:%[0-9]+]]:_(p5) = G_GEP [[GEP7]], [[C]](s32) ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p5) :: (load 1, addrspace 56) @@ -6356,17 +6985,20 @@ body: | ; VI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C3]] ; VI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) ; VI: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C3]] - ; VI: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C4]](s16) - ; VI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]] + ; VI: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C4]](s16) + ; VI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]] ; VI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; VI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C3]] ; VI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) ; VI: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C3]] - ; VI: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C4]](s16) - ; VI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] - ; VI: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16) - ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; VI: [[GEP11:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C7]](s32) + ; VI: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C4]](s16) + ; VI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]] + ; VI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; VI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; VI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C5]](s32) + ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] + ; VI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; VI: [[GEP11:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C8]](s32) ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[GEP11]](p5) :: (load 1, addrspace 56) ; VI: [[GEP12:%[0-9]+]]:_(p5) = G_GEP [[GEP11]], [[C]](s32) ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[GEP12]](p5) :: (load 1, addrspace 56) @@ -6378,16 +7010,19 @@ body: | ; VI: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C3]] ; VI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32) ; VI: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C3]] - ; VI: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C4]](s16) - ; VI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL6]] + ; VI: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C4]](s16) + ; VI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL9]] ; VI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; VI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C3]] ; VI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32) ; VI: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C3]] - ; VI: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C4]](s16) - ; VI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL7]] - ; VI: [[MV3:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR6]](s16), [[OR7]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32), [[MV2]](s32), [[MV3]](s32) + ; VI: [[SHL10:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C4]](s16) + ; VI: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL10]] + ; VI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; VI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR10]](s16) + ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C5]](s32) + ; VI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) ; GFX9-LABEL: name: test_load_private_v4s32_align16 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 @@ -6415,9 +7050,13 @@ body: | ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX9: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C5]](s32) + ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX9: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C6]](s32) ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p5) :: (load 1, addrspace 56) ; GFX9: [[GEP4:%[0-9]+]]:_(p5) = G_GEP [[GEP3]], [[C]](s32) ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p5) :: (load 1, addrspace 56) @@ -6429,17 +7068,20 @@ body: | ; GFX9: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C3]] ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) ; GFX9: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C3]] - ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) - ; GFX9: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] + ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) + ; GFX9: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]] ; GFX9: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; GFX9: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; GFX9: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) ; GFX9: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C3]] - ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) - ; GFX9: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; GFX9: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) - ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9: [[GEP7:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C6]](s32) + ; GFX9: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) + ; GFX9: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL4]] + ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C5]](s32) + ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; GFX9: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9: [[GEP7:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C7]](s32) ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p5) :: (load 1, addrspace 56) ; GFX9: [[GEP8:%[0-9]+]]:_(p5) = G_GEP [[GEP7]], [[C]](s32) ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p5) :: (load 1, addrspace 56) @@ -6451,17 +7093,20 @@ body: | ; GFX9: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C3]] ; GFX9: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) ; GFX9: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C3]] - ; GFX9: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C4]](s16) - ; GFX9: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]] + ; GFX9: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C4]](s16) + ; GFX9: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]] ; GFX9: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; GFX9: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C3]] ; GFX9: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) ; GFX9: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C3]] - ; GFX9: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C4]](s16) - ; GFX9: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] - ; GFX9: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16) - ; GFX9: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX9: [[GEP11:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C7]](s32) + ; GFX9: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C4]](s16) + ; GFX9: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]] + ; GFX9: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; GFX9: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; GFX9: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C5]](s32) + ; GFX9: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] + ; GFX9: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; GFX9: [[GEP11:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C8]](s32) ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[GEP11]](p5) :: (load 1, addrspace 56) ; GFX9: [[GEP12:%[0-9]+]]:_(p5) = G_GEP [[GEP11]], [[C]](s32) ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[GEP12]](p5) :: (load 1, addrspace 56) @@ -6473,16 +7118,19 @@ body: | ; GFX9: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C3]] ; GFX9: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32) ; GFX9: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C3]] - ; GFX9: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C4]](s16) - ; GFX9: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL6]] + ; GFX9: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C4]](s16) + ; GFX9: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL9]] ; GFX9: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; GFX9: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C3]] ; GFX9: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32) ; GFX9: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C3]] - ; GFX9: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C4]](s16) - ; GFX9: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL7]] - ; GFX9: [[MV3:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR6]](s16), [[OR7]](s16) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32), [[MV2]](s32), [[MV3]](s32) + ; GFX9: [[SHL10:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C4]](s16) + ; GFX9: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL10]] + ; GFX9: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; GFX9: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR10]](s16) + ; GFX9: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C5]](s32) + ; GFX9: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(p5) = COPY $vgpr0 %1:_(<4 x s32>) = G_LOAD %0 :: (load 16, align 1, addrspace 56) @@ -6632,142 +7280,198 @@ body: | ; SI-LABEL: name: test_load_private_v4s32_align2 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[GEP:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C]](s32) ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p5) :: (load 2, addrspace 5) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; SI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI: [[GEP1:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C1]](s32) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; SI: [[GEP1:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C3]](s32) ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p5) :: (load 2, addrspace 5) - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; SI: [[GEP2:%[0-9]+]]:_(p5) = G_GEP [[GEP1]], [[C]](s32) ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p5) :: (load 2, addrspace 5) - ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; SI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC2]](s16), [[TRUNC3]](s16) - ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C2]](s32) + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; SI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C4]](s32) ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p5) :: (load 2, addrspace 5) - ; SI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; SI: [[GEP4:%[0-9]+]]:_(p5) = G_GEP [[GEP3]], [[C]](s32) ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p5) :: (load 2, addrspace 5) - ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) - ; SI: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC4]](s16), [[TRUNC5]](s16) - ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; SI: [[GEP5:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C3]](s32) + ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] + ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] + ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) + ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; SI: [[GEP5:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C5]](s32) ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[GEP5]](p5) :: (load 2, addrspace 5) - ; SI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; SI: [[GEP6:%[0-9]+]]:_(p5) = G_GEP [[GEP5]], [[C]](s32) ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[GEP6]](p5) :: (load 2, addrspace 5) - ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; SI: [[MV3:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC6]](s16), [[TRUNC7]](s16) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32), [[MV2]](s32), [[MV3]](s32) + ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] + ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) + ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]] + ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32) + ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) ; CI-LABEL: name: test_load_private_v4s32_align2 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) - ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[GEP:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C]](s32) ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p5) :: (load 2, addrspace 5) - ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; CI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI: [[GEP1:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C1]](s32) + ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CI: [[GEP1:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C3]](s32) ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p5) :: (load 2, addrspace 5) - ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI: [[GEP2:%[0-9]+]]:_(p5) = G_GEP [[GEP1]], [[C]](s32) ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p5) :: (load 2, addrspace 5) - ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; CI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC2]](s16), [[TRUNC3]](s16) - ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C2]](s32) + ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C4]](s32) ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p5) :: (load 2, addrspace 5) - ; CI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; CI: [[GEP4:%[0-9]+]]:_(p5) = G_GEP [[GEP3]], [[C]](s32) ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p5) :: (load 2, addrspace 5) - ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) - ; CI: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC4]](s16), [[TRUNC5]](s16) - ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CI: [[GEP5:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C3]](s32) + ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] + ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] + ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) + ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CI: [[GEP5:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C5]](s32) ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[GEP5]](p5) :: (load 2, addrspace 5) - ; CI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; CI: [[GEP6:%[0-9]+]]:_(p5) = G_GEP [[GEP5]], [[C]](s32) ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[GEP6]](p5) :: (load 2, addrspace 5) - ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; CI: [[MV3:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC6]](s16), [[TRUNC7]](s16) - ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32), [[MV2]](s32), [[MV3]](s32) + ; CI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] + ; CI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) + ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]] + ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32) + ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) ; VI-LABEL: name: test_load_private_v4s32_align2 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[GEP:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C]](s32) ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p5) :: (load 2, addrspace 5) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; VI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI: [[GEP1:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C1]](s32) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; VI: [[GEP1:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C3]](s32) ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p5) :: (load 2, addrspace 5) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; VI: [[GEP2:%[0-9]+]]:_(p5) = G_GEP [[GEP1]], [[C]](s32) ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p5) :: (load 2, addrspace 5) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; VI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC2]](s16), [[TRUNC3]](s16) - ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C2]](s32) + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; VI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C4]](s32) ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p5) :: (load 2, addrspace 5) - ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; VI: [[GEP4:%[0-9]+]]:_(p5) = G_GEP [[GEP3]], [[C]](s32) ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p5) :: (load 2, addrspace 5) - ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) - ; VI: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC4]](s16), [[TRUNC5]](s16) - ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; VI: [[GEP5:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C3]](s32) + ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] + ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; VI: [[GEP5:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C5]](s32) ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[GEP5]](p5) :: (load 2, addrspace 5) - ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; VI: [[GEP6:%[0-9]+]]:_(p5) = G_GEP [[GEP5]], [[C]](s32) ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[GEP6]](p5) :: (load 2, addrspace 5) - ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; VI: [[MV3:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC6]](s16), [[TRUNC7]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32), [[MV2]](s32), [[MV3]](s32) + ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] + ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) + ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]] + ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32) + ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) ; GFX9-LABEL: name: test_load_private_v4s32_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[GEP:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C]](s32) ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p5) :: (load 2, addrspace 5) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9: [[GEP1:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C1]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX9: [[GEP1:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C3]](s32) ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p5) :: (load 2, addrspace 5) - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; GFX9: [[GEP2:%[0-9]+]]:_(p5) = G_GEP [[GEP1]], [[C]](s32) ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p5) :: (load 2, addrspace 5) - ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX9: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C2]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C4]](s32) ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p5) :: (load 2, addrspace 5) - ; GFX9: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; GFX9: [[GEP4:%[0-9]+]]:_(p5) = G_GEP [[GEP3]], [[C]](s32) - ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p5) :: (load 2, addrspace 5) - ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) - ; GFX9: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX9: [[GEP5:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C3]](s32) + ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p5) :: (load 2, addrspace 5) + ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] + ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] + ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) + ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; GFX9: [[GEP5:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C5]](s32) ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[GEP5]](p5) :: (load 2, addrspace 5) - ; GFX9: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; GFX9: [[GEP6:%[0-9]+]]:_(p5) = G_GEP [[GEP5]], [[C]](s32) ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[GEP6]](p5) :: (load 2, addrspace 5) - ; GFX9: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; GFX9: [[MV3:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC6]](s16), [[TRUNC7]](s16) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32), [[MV2]](s32), [[MV3]](s32) + ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; GFX9: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] + ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) + ; GFX9: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]] + ; GFX9: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32) + ; GFX9: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(p5) = COPY $vgpr0 %1:_(<4 x s32>) = G_LOAD %0 :: (load 16, align 2, addrspace 5) @@ -6811,9 +7515,13 @@ body: | ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY3]](s32) ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; SI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C6]](s32) + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32) + ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; SI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C7]](s32) ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p5) :: (load 1, addrspace 5) ; SI: [[GEP4:%[0-9]+]]:_(p5) = G_GEP [[GEP3]], [[C]](s32) ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p5) :: (load 1, addrspace 5) @@ -6826,18 +7534,21 @@ body: | ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]] - ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY5]](s32) - ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; SI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] + ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY5]](s32) + ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) + ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; SI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; SI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C5]] - ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY7]](s32) - ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; SI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) + ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY7]](s32) + ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) + ; SI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] + ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C6]](s32) + ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] ; SI: [[GEP7:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C4]](s32) ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p5) :: (load 1, addrspace 5) ; SI: [[GEP8:%[0-9]+]]:_(p5) = G_GEP [[GEP7]], [[C]](s32) @@ -6851,20 +7562,23 @@ body: | ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C5]] - ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY9]](s32) - ; SI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; SI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] + ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY9]](s32) + ; SI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) + ; SI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] ; SI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; SI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C3]] ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C5]] - ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY11]](s32) - ; SI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) - ; SI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] - ; SI: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16) - ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; SI: [[GEP11:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C7]](s32) + ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY11]](s32) + ; SI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) + ; SI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] + ; SI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; SI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; SI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C6]](s32) + ; SI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] + ; SI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; SI: [[GEP11:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C8]](s32) ; SI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[GEP11]](p5) :: (load 1, addrspace 5) ; SI: [[GEP12:%[0-9]+]]:_(p5) = G_GEP [[GEP11]], [[C]](s32) ; SI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[GEP12]](p5) :: (load 1, addrspace 5) @@ -6877,19 +7591,22 @@ body: | ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; SI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) ; SI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C5]] - ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY13]](s32) - ; SI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) - ; SI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] + ; SI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY13]](s32) + ; SI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) + ; SI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] ; SI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; SI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C3]] ; SI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; SI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32) ; SI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C5]] - ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY15]](s32) - ; SI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) - ; SI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] - ; SI: [[MV3:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR6]](s16), [[OR7]](s16) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32), [[MV2]](s32), [[MV3]](s32) + ; SI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY15]](s32) + ; SI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL10]](s32) + ; SI: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] + ; SI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; SI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR10]](s16) + ; SI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C6]](s32) + ; SI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) ; CI-LABEL: name: test_load_private_v4s32_align1 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 @@ -6922,9 +7639,13 @@ body: | ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY3]](s32) ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C6]](s32) + ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32) + ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C7]](s32) ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p5) :: (load 1, addrspace 5) ; CI: [[GEP4:%[0-9]+]]:_(p5) = G_GEP [[GEP3]], [[C]](s32) ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p5) :: (load 1, addrspace 5) @@ -6937,18 +7658,21 @@ body: | ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]] - ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY5]](s32) - ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; CI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] + ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY5]](s32) + ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) + ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; CI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; CI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; CI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C5]] - ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY7]](s32) - ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) + ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY7]](s32) + ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) + ; CI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] + ; CI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C6]](s32) + ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] ; CI: [[GEP7:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C4]](s32) ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p5) :: (load 1, addrspace 5) ; CI: [[GEP8:%[0-9]+]]:_(p5) = G_GEP [[GEP7]], [[C]](s32) @@ -6962,20 +7686,23 @@ body: | ; CI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) ; CI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C5]] - ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY9]](s32) - ; CI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; CI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] + ; CI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY9]](s32) + ; CI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) + ; CI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] ; CI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; CI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C3]] ; CI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) ; CI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C5]] - ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY11]](s32) - ; CI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) - ; CI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] - ; CI: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16) - ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CI: [[GEP11:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C7]](s32) + ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY11]](s32) + ; CI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) + ; CI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] + ; CI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; CI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; CI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C6]](s32) + ; CI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] + ; CI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CI: [[GEP11:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C8]](s32) ; CI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[GEP11]](p5) :: (load 1, addrspace 5) ; CI: [[GEP12:%[0-9]+]]:_(p5) = G_GEP [[GEP11]], [[C]](s32) ; CI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[GEP12]](p5) :: (load 1, addrspace 5) @@ -6988,19 +7715,22 @@ body: | ; CI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) ; CI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C5]] - ; CI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY13]](s32) - ; CI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) - ; CI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] + ; CI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY13]](s32) + ; CI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) + ; CI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] ; CI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; CI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C3]] ; CI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32) ; CI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C5]] - ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY15]](s32) - ; CI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) - ; CI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] - ; CI: [[MV3:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR6]](s16), [[OR7]](s16) - ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32), [[MV2]](s32), [[MV3]](s32) + ; CI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY15]](s32) + ; CI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL10]](s32) + ; CI: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] + ; CI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; CI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR10]](s16) + ; CI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C6]](s32) + ; CI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) ; VI-LABEL: name: test_load_private_v4s32_align1 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 @@ -7028,9 +7758,13 @@ body: | ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; VI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C5]](s32) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; VI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C6]](s32) ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p5) :: (load 1, addrspace 5) ; VI: [[GEP4:%[0-9]+]]:_(p5) = G_GEP [[GEP3]], [[C]](s32) ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p5) :: (load 1, addrspace 5) @@ -7042,17 +7776,20 @@ body: | ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C3]] ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C3]] - ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) - ; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] + ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) + ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]] ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; VI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C3]] - ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) - ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; VI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) - ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI: [[GEP7:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C6]](s32) + ; VI: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) + ; VI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL4]] + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C5]](s32) + ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; VI: [[GEP7:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C7]](s32) ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p5) :: (load 1, addrspace 5) ; VI: [[GEP8:%[0-9]+]]:_(p5) = G_GEP [[GEP7]], [[C]](s32) ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p5) :: (load 1, addrspace 5) @@ -7064,17 +7801,20 @@ body: | ; VI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C3]] ; VI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) ; VI: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C3]] - ; VI: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C4]](s16) - ; VI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]] + ; VI: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C4]](s16) + ; VI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]] ; VI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; VI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C3]] ; VI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) ; VI: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C3]] - ; VI: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C4]](s16) - ; VI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] - ; VI: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16) - ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; VI: [[GEP11:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C7]](s32) + ; VI: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C4]](s16) + ; VI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]] + ; VI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; VI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; VI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C5]](s32) + ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] + ; VI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; VI: [[GEP11:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C8]](s32) ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[GEP11]](p5) :: (load 1, addrspace 5) ; VI: [[GEP12:%[0-9]+]]:_(p5) = G_GEP [[GEP11]], [[C]](s32) ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[GEP12]](p5) :: (load 1, addrspace 5) @@ -7086,16 +7826,19 @@ body: | ; VI: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C3]] ; VI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32) ; VI: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C3]] - ; VI: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C4]](s16) - ; VI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL6]] + ; VI: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C4]](s16) + ; VI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL9]] ; VI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; VI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C3]] ; VI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32) ; VI: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C3]] - ; VI: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C4]](s16) - ; VI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL7]] - ; VI: [[MV3:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR6]](s16), [[OR7]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32), [[MV2]](s32), [[MV3]](s32) + ; VI: [[SHL10:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C4]](s16) + ; VI: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL10]] + ; VI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; VI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR10]](s16) + ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C5]](s32) + ; VI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) ; GFX9-LABEL: name: test_load_private_v4s32_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 @@ -7123,9 +7866,13 @@ body: | ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX9: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C5]](s32) + ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX9: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C6]](s32) ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p5) :: (load 1, addrspace 5) ; GFX9: [[GEP4:%[0-9]+]]:_(p5) = G_GEP [[GEP3]], [[C]](s32) ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p5) :: (load 1, addrspace 5) @@ -7137,17 +7884,20 @@ body: | ; GFX9: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C3]] ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) ; GFX9: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C3]] - ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) - ; GFX9: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] + ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) + ; GFX9: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]] ; GFX9: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; GFX9: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; GFX9: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) ; GFX9: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C3]] - ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) - ; GFX9: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; GFX9: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) - ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9: [[GEP7:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C6]](s32) + ; GFX9: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) + ; GFX9: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL4]] + ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C5]](s32) + ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; GFX9: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9: [[GEP7:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C7]](s32) ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p5) :: (load 1, addrspace 5) ; GFX9: [[GEP8:%[0-9]+]]:_(p5) = G_GEP [[GEP7]], [[C]](s32) ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p5) :: (load 1, addrspace 5) @@ -7159,17 +7909,20 @@ body: | ; GFX9: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C3]] ; GFX9: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) ; GFX9: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C3]] - ; GFX9: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C4]](s16) - ; GFX9: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]] + ; GFX9: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C4]](s16) + ; GFX9: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]] ; GFX9: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; GFX9: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C3]] ; GFX9: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) ; GFX9: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C3]] - ; GFX9: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C4]](s16) - ; GFX9: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] - ; GFX9: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16) - ; GFX9: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX9: [[GEP11:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C7]](s32) + ; GFX9: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C4]](s16) + ; GFX9: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]] + ; GFX9: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; GFX9: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; GFX9: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C5]](s32) + ; GFX9: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] + ; GFX9: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; GFX9: [[GEP11:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C8]](s32) ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[GEP11]](p5) :: (load 1, addrspace 5) ; GFX9: [[GEP12:%[0-9]+]]:_(p5) = G_GEP [[GEP11]], [[C]](s32) ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[GEP12]](p5) :: (load 1, addrspace 5) @@ -7181,16 +7934,19 @@ body: | ; GFX9: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C3]] ; GFX9: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32) ; GFX9: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C3]] - ; GFX9: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C4]](s16) - ; GFX9: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL6]] + ; GFX9: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C4]](s16) + ; GFX9: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL9]] ; GFX9: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; GFX9: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C3]] ; GFX9: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32) ; GFX9: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C3]] - ; GFX9: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C4]](s16) - ; GFX9: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL7]] - ; GFX9: [[MV3:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR6]](s16), [[OR7]](s16) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32), [[MV2]](s32), [[MV3]](s32) + ; GFX9: [[SHL10:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C4]](s16) + ; GFX9: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL10]] + ; GFX9: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; GFX9: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR10]](s16) + ; GFX9: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C5]](s32) + ; GFX9: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(p5) = COPY $vgpr0 %1:_(<4 x s32>) = G_LOAD %0 :: (load 16, align 1, addrspace 5) @@ -7615,9 +8371,13 @@ body: | ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY3]](s32) ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; SI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C6]](s32) + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32) + ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; SI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C7]](s32) ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p5) :: (load 1, addrspace 5) ; SI: [[GEP4:%[0-9]+]]:_(p5) = G_GEP [[GEP3]], [[C]](s32) ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p5) :: (load 1, addrspace 5) @@ -7630,19 +8390,22 @@ body: | ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]] - ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY5]](s32) - ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; SI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] + ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY5]](s32) + ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) + ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; SI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; SI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C5]] - ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY7]](s32) - ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; SI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) - ; SI: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32) + ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY7]](s32) + ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) + ; SI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] + ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C6]](s32) + ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32) ; SI: [[GEP7:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C4]](s32) ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p5) :: (load 1, addrspace 5) ; SI: [[GEP8:%[0-9]+]]:_(p5) = G_GEP [[GEP7]], [[C]](s32) @@ -7656,19 +8419,22 @@ body: | ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C5]] - ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY9]](s32) - ; SI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; SI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] + ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY9]](s32) + ; SI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) + ; SI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] ; SI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; SI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C3]] ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C5]] - ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY11]](s32) - ; SI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) - ; SI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] - ; SI: [[MV3:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16) - ; SI: [[GEP11:%[0-9]+]]:_(p5) = G_GEP [[GEP7]], [[C6]](s32) + ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY11]](s32) + ; SI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) + ; SI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] + ; SI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; SI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; SI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C6]](s32) + ; SI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] + ; SI: [[GEP11:%[0-9]+]]:_(p5) = G_GEP [[GEP7]], [[C7]](s32) ; SI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[GEP11]](p5) :: (load 1, addrspace 5) ; SI: [[GEP12:%[0-9]+]]:_(p5) = G_GEP [[GEP11]], [[C]](s32) ; SI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[GEP12]](p5) :: (load 1, addrspace 5) @@ -7681,20 +8447,23 @@ body: | ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; SI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) ; SI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C5]] - ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY13]](s32) - ; SI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) - ; SI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] + ; SI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY13]](s32) + ; SI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) + ; SI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] ; SI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; SI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C3]] ; SI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; SI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32) ; SI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C5]] - ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY15]](s32) - ; SI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) - ; SI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] - ; SI: [[MV4:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR6]](s16), [[OR7]](s16) - ; SI: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MV3]](s32), [[MV4]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV2]](s64), [[MV5]](s64) + ; SI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY15]](s32) + ; SI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL10]](s32) + ; SI: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] + ; SI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; SI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR10]](s16) + ; SI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C6]](s32) + ; SI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; SI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR8]](s32), [[OR11]](s32) + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; CI-LABEL: name: test_load_private_v2s64_align16 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 @@ -7727,9 +8496,13 @@ body: | ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY3]](s32) ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C6]](s32) + ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32) + ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C7]](s32) ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p5) :: (load 1, addrspace 5) ; CI: [[GEP4:%[0-9]+]]:_(p5) = G_GEP [[GEP3]], [[C]](s32) ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p5) :: (load 1, addrspace 5) @@ -7742,19 +8515,22 @@ body: | ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]] - ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY5]](s32) - ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; CI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] + ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY5]](s32) + ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) + ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; CI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; CI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; CI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C5]] - ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY7]](s32) - ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) - ; CI: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32) + ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY7]](s32) + ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) + ; CI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] + ; CI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C6]](s32) + ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32) ; CI: [[GEP7:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C4]](s32) ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p5) :: (load 1, addrspace 5) ; CI: [[GEP8:%[0-9]+]]:_(p5) = G_GEP [[GEP7]], [[C]](s32) @@ -7768,19 +8544,22 @@ body: | ; CI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) ; CI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C5]] - ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY9]](s32) - ; CI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; CI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] + ; CI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY9]](s32) + ; CI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) + ; CI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] ; CI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; CI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C3]] ; CI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) ; CI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C5]] - ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY11]](s32) - ; CI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) - ; CI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] - ; CI: [[MV3:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16) - ; CI: [[GEP11:%[0-9]+]]:_(p5) = G_GEP [[GEP7]], [[C6]](s32) + ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY11]](s32) + ; CI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) + ; CI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] + ; CI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; CI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; CI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C6]](s32) + ; CI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] + ; CI: [[GEP11:%[0-9]+]]:_(p5) = G_GEP [[GEP7]], [[C7]](s32) ; CI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[GEP11]](p5) :: (load 1, addrspace 5) ; CI: [[GEP12:%[0-9]+]]:_(p5) = G_GEP [[GEP11]], [[C]](s32) ; CI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[GEP12]](p5) :: (load 1, addrspace 5) @@ -7793,20 +8572,23 @@ body: | ; CI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) ; CI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C5]] - ; CI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY13]](s32) - ; CI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) - ; CI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] + ; CI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY13]](s32) + ; CI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) + ; CI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] ; CI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; CI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C3]] ; CI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32) ; CI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C5]] - ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY15]](s32) - ; CI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) - ; CI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] - ; CI: [[MV4:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR6]](s16), [[OR7]](s16) - ; CI: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MV3]](s32), [[MV4]](s32) - ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV2]](s64), [[MV5]](s64) + ; CI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY15]](s32) + ; CI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL10]](s32) + ; CI: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] + ; CI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; CI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR10]](s16) + ; CI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C6]](s32) + ; CI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; CI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR8]](s32), [[OR11]](s32) + ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; VI-LABEL: name: test_load_private_v2s64_align16 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 @@ -7834,9 +8616,13 @@ body: | ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; VI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C5]](s32) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; VI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C6]](s32) ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p5) :: (load 1, addrspace 5) ; VI: [[GEP4:%[0-9]+]]:_(p5) = G_GEP [[GEP3]], [[C]](s32) ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p5) :: (load 1, addrspace 5) @@ -7848,18 +8634,21 @@ body: | ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C3]] ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C3]] - ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) - ; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] + ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) + ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]] ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; VI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C3]] - ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) - ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; VI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) - ; VI: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32) - ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI: [[GEP7:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C6]](s32) + ; VI: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) + ; VI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL4]] + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C5]](s32) + ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32) + ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; VI: [[GEP7:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C7]](s32) ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p5) :: (load 1, addrspace 5) ; VI: [[GEP8:%[0-9]+]]:_(p5) = G_GEP [[GEP7]], [[C]](s32) ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p5) :: (load 1, addrspace 5) @@ -7871,16 +8660,19 @@ body: | ; VI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C3]] ; VI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) ; VI: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C3]] - ; VI: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C4]](s16) - ; VI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]] + ; VI: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C4]](s16) + ; VI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]] ; VI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; VI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C3]] ; VI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) ; VI: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C3]] - ; VI: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C4]](s16) - ; VI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] - ; VI: [[MV3:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16) - ; VI: [[GEP11:%[0-9]+]]:_(p5) = G_GEP [[GEP7]], [[C5]](s32) + ; VI: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C4]](s16) + ; VI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]] + ; VI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; VI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; VI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C5]](s32) + ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] + ; VI: [[GEP11:%[0-9]+]]:_(p5) = G_GEP [[GEP7]], [[C6]](s32) ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[GEP11]](p5) :: (load 1, addrspace 5) ; VI: [[GEP12:%[0-9]+]]:_(p5) = G_GEP [[GEP11]], [[C]](s32) ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[GEP12]](p5) :: (load 1, addrspace 5) @@ -7892,17 +8684,20 @@ body: | ; VI: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C3]] ; VI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32) ; VI: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C3]] - ; VI: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C4]](s16) - ; VI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL6]] + ; VI: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C4]](s16) + ; VI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL9]] ; VI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; VI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C3]] ; VI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32) ; VI: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C3]] - ; VI: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C4]](s16) - ; VI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL7]] - ; VI: [[MV4:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR6]](s16), [[OR7]](s16) - ; VI: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MV3]](s32), [[MV4]](s32) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV2]](s64), [[MV5]](s64) + ; VI: [[SHL10:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C4]](s16) + ; VI: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL10]] + ; VI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; VI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR10]](s16) + ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C5]](s32) + ; VI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; VI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR8]](s32), [[OR11]](s32) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; GFX9-LABEL: name: test_load_private_v2s64_align16 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 @@ -7930,9 +8725,13 @@ body: | ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX9: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C5]](s32) + ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX9: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C6]](s32) ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p5) :: (load 1, addrspace 5) ; GFX9: [[GEP4:%[0-9]+]]:_(p5) = G_GEP [[GEP3]], [[C]](s32) ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p5) :: (load 1, addrspace 5) @@ -7944,18 +8743,21 @@ body: | ; GFX9: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C3]] ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) ; GFX9: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C3]] - ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) - ; GFX9: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] + ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) + ; GFX9: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]] ; GFX9: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; GFX9: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; GFX9: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) ; GFX9: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C3]] - ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) - ; GFX9: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; GFX9: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) - ; GFX9: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32) - ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9: [[GEP7:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C6]](s32) + ; GFX9: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) + ; GFX9: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL4]] + ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C5]](s32) + ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32) + ; GFX9: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9: [[GEP7:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C7]](s32) ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p5) :: (load 1, addrspace 5) ; GFX9: [[GEP8:%[0-9]+]]:_(p5) = G_GEP [[GEP7]], [[C]](s32) ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p5) :: (load 1, addrspace 5) @@ -7967,16 +8769,19 @@ body: | ; GFX9: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C3]] ; GFX9: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) ; GFX9: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C3]] - ; GFX9: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C4]](s16) - ; GFX9: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]] + ; GFX9: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C4]](s16) + ; GFX9: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]] ; GFX9: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; GFX9: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C3]] ; GFX9: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) ; GFX9: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C3]] - ; GFX9: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C4]](s16) - ; GFX9: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] - ; GFX9: [[MV3:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16) - ; GFX9: [[GEP11:%[0-9]+]]:_(p5) = G_GEP [[GEP7]], [[C5]](s32) + ; GFX9: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C4]](s16) + ; GFX9: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]] + ; GFX9: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; GFX9: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; GFX9: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C5]](s32) + ; GFX9: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] + ; GFX9: [[GEP11:%[0-9]+]]:_(p5) = G_GEP [[GEP7]], [[C6]](s32) ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[GEP11]](p5) :: (load 1, addrspace 5) ; GFX9: [[GEP12:%[0-9]+]]:_(p5) = G_GEP [[GEP11]], [[C]](s32) ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[GEP12]](p5) :: (load 1, addrspace 5) @@ -7988,17 +8793,20 @@ body: | ; GFX9: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C3]] ; GFX9: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32) ; GFX9: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C3]] - ; GFX9: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C4]](s16) - ; GFX9: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL6]] + ; GFX9: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C4]](s16) + ; GFX9: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL9]] ; GFX9: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; GFX9: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C3]] ; GFX9: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32) ; GFX9: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C3]] - ; GFX9: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C4]](s16) - ; GFX9: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL7]] - ; GFX9: [[MV4:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR6]](s16), [[OR7]](s16) - ; GFX9: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MV3]](s32), [[MV4]](s32) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV2]](s64), [[MV5]](s64) + ; GFX9: [[SHL10:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C4]](s16) + ; GFX9: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL10]] + ; GFX9: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; GFX9: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR10]](s16) + ; GFX9: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C5]](s32) + ; GFX9: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR8]](s32), [[OR11]](s32) + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) %0:_(p5) = COPY $vgpr0 %1:_(<2 x s64>) = G_LOAD %0 :: (load 16, align 1, addrspace 5) @@ -8715,9 +9523,13 @@ body: | ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; SI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C6]](s32) + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32) + ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; SI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C7]](s32) ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p5) :: (load 1, addrspace 5) ; SI: [[GEP4:%[0-9]+]]:_(p5) = G_GEP [[GEP3]], [[C]](s32) ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p5) :: (load 1, addrspace 5) @@ -8730,19 +9542,22 @@ body: | ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] - ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) - ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; SI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] + ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) + ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) + ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; SI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; SI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C5]] - ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY6]](s32) - ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; SI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32) + ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY6]](s32) + ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) + ; SI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] + ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C6]](s32) + ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; CI-LABEL: name: test_extload_private_v2s32_from_4_align1 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 @@ -8774,9 +9589,13 @@ body: | ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C6]](s32) + ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32) + ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C7]](s32) ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p5) :: (load 1, addrspace 5) ; CI: [[GEP4:%[0-9]+]]:_(p5) = G_GEP [[GEP3]], [[C]](s32) ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p5) :: (load 1, addrspace 5) @@ -8789,19 +9608,22 @@ body: | ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] - ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) - ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; CI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] + ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) + ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) + ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; CI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; CI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C5]] - ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY6]](s32) - ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) - ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32) + ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY6]](s32) + ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) + ; CI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] + ; CI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C6]](s32) + ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) ; CI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; VI-LABEL: name: test_extload_private_v2s32_from_4_align1 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 @@ -8829,9 +9651,13 @@ body: | ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; VI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C5]](s32) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; VI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C6]](s32) ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p5) :: (load 1, addrspace 5) ; VI: [[GEP4:%[0-9]+]]:_(p5) = G_GEP [[GEP3]], [[C]](s32) ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p5) :: (load 1, addrspace 5) @@ -8843,16 +9669,19 @@ body: | ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C3]] ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C3]] - ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) - ; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] + ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) + ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]] ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; VI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C3]] - ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) - ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; VI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32) + ; VI: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) + ; VI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL4]] + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C5]](s32) + ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-LABEL: name: test_extload_private_v2s32_from_4_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 @@ -8880,9 +9709,13 @@ body: | ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX9: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C5]](s32) + ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX9: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C6]](s32) ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p5) :: (load 1, addrspace 5) ; GFX9: [[GEP4:%[0-9]+]]:_(p5) = G_GEP [[GEP3]], [[C]](s32) ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p5) :: (load 1, addrspace 5) @@ -8894,16 +9727,19 @@ body: | ; GFX9: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C3]] ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) ; GFX9: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C3]] - ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) - ; GFX9: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] + ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) + ; GFX9: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]] ; GFX9: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; GFX9: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; GFX9: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) ; GFX9: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C3]] - ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) - ; GFX9: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; GFX9: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32) + ; GFX9: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) + ; GFX9: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL4]] + ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C5]](s32) + ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(p5) = COPY $vgpr0 %1:_(<2 x s32>) = G_LOAD %0 :: (load 4, align 1, addrspace 5) @@ -8919,78 +9755,110 @@ body: | ; SI-LABEL: name: test_extload_private_v2s32_from_4_align2 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[GEP:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C]](s32) ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p5) :: (load 2, addrspace 5) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; SI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI: [[GEP1:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C1]](s32) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; SI: [[GEP1:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C3]](s32) ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p5) :: (load 2, addrspace 5) - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; SI: [[GEP2:%[0-9]+]]:_(p5) = G_GEP [[GEP1]], [[C]](s32) ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p5) :: (load 2, addrspace 5) - ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; SI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC2]](s16), [[TRUNC3]](s16) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32) + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; CI-LABEL: name: test_extload_private_v2s32_from_4_align2 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) - ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[GEP:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C]](s32) ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p5) :: (load 2, addrspace 5) - ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; CI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI: [[GEP1:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C1]](s32) + ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CI: [[GEP1:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C3]](s32) ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p5) :: (load 2, addrspace 5) - ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI: [[GEP2:%[0-9]+]]:_(p5) = G_GEP [[GEP1]], [[C]](s32) ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p5) :: (load 2, addrspace 5) - ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; CI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC2]](s16), [[TRUNC3]](s16) - ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32) + ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) ; CI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; VI-LABEL: name: test_extload_private_v2s32_from_4_align2 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[GEP:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C]](s32) ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p5) :: (load 2, addrspace 5) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; VI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI: [[GEP1:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C1]](s32) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; VI: [[GEP1:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C3]](s32) ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p5) :: (load 2, addrspace 5) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; VI: [[GEP2:%[0-9]+]]:_(p5) = G_GEP [[GEP1]], [[C]](s32) ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p5) :: (load 2, addrspace 5) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; VI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC2]](s16), [[TRUNC3]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32) + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-LABEL: name: test_extload_private_v2s32_from_4_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[GEP:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C]](s32) ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p5) :: (load 2, addrspace 5) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9: [[GEP1:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C1]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX9: [[GEP1:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C3]](s32) ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p5) :: (load 2, addrspace 5) - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; GFX9: [[GEP2:%[0-9]+]]:_(p5) = G_GEP [[GEP1]], [[C]](s32) ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p5) :: (load 2, addrspace 5) - ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX9: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(p5) = COPY $vgpr0 %1:_(<2 x s32>) = G_LOAD %0 :: (load 4, align 2, addrspace 5) @@ -9203,9 +10071,13 @@ body: | ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY3]](s32) ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; SI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C6]](s32) + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32) + ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; SI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C7]](s32) ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p5) :: (load 1, addrspace 5) ; SI: [[GEP4:%[0-9]+]]:_(p5) = G_GEP [[GEP3]], [[C]](s32) ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p5) :: (load 1, addrspace 5) @@ -9218,18 +10090,21 @@ body: | ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]] - ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY5]](s32) - ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; SI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] + ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY5]](s32) + ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) + ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; SI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; SI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C5]] - ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY7]](s32) - ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; SI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) + ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY7]](s32) + ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) + ; SI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] + ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C6]](s32) + ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] ; SI: [[GEP7:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C4]](s32) ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p5) :: (load 1, addrspace 5) ; SI: [[GEP8:%[0-9]+]]:_(p5) = G_GEP [[GEP7]], [[C]](s32) @@ -9243,21 +10118,24 @@ body: | ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C5]] - ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY9]](s32) - ; SI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; SI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] + ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY9]](s32) + ; SI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) + ; SI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] ; SI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; SI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C3]] ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C5]] - ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY11]](s32) - ; SI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) - ; SI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] - ; SI: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16) - ; SI: [[MV3:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32), [[MV2]](s32) - ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; SI: [[GEP11:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C7]](s32) + ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY11]](s32) + ; SI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) + ; SI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] + ; SI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; SI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; SI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C6]](s32) + ; SI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] + ; SI: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) + ; SI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; SI: [[GEP11:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C8]](s32) ; SI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[GEP11]](p5) :: (load 1, addrspace 5) ; SI: [[GEP12:%[0-9]+]]:_(p5) = G_GEP [[GEP11]], [[C]](s32) ; SI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[GEP12]](p5) :: (load 1, addrspace 5) @@ -9270,19 +10148,22 @@ body: | ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; SI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) ; SI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C5]] - ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY13]](s32) - ; SI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) - ; SI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] + ; SI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY13]](s32) + ; SI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) + ; SI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] ; SI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; SI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C3]] ; SI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; SI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32) ; SI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C5]] - ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY15]](s32) - ; SI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) - ; SI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] - ; SI: [[MV4:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR6]](s16), [[OR7]](s16) - ; SI: [[GEP15:%[0-9]+]]:_(p5) = G_GEP [[GEP11]], [[C6]](s32) + ; SI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY15]](s32) + ; SI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL10]](s32) + ; SI: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] + ; SI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; SI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR10]](s16) + ; SI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C6]](s32) + ; SI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; SI: [[GEP15:%[0-9]+]]:_(p5) = G_GEP [[GEP11]], [[C7]](s32) ; SI: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[GEP15]](p5) :: (load 1, addrspace 5) ; SI: [[GEP16:%[0-9]+]]:_(p5) = G_GEP [[GEP15]], [[C]](s32) ; SI: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[GEP16]](p5) :: (load 1, addrspace 5) @@ -9295,18 +10176,21 @@ body: | ; SI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; SI: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LOAD17]](s32) ; SI: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY18]], [[C5]] - ; SI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[COPY17]](s32) - ; SI: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32) - ; SI: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[TRUNC17]] + ; SI: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[COPY17]](s32) + ; SI: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[SHL12]](s32) + ; SI: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[TRUNC17]] ; SI: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32) ; SI: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C3]] ; SI: [[COPY19:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; SI: [[COPY20:%[0-9]+]]:_(s32) = COPY [[LOAD19]](s32) ; SI: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY20]], [[C5]] - ; SI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[COPY19]](s32) - ; SI: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) - ; SI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[TRUNC19]] - ; SI: [[MV5:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR8]](s16), [[OR9]](s16) + ; SI: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[COPY19]](s32) + ; SI: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[SHL13]](s32) + ; SI: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[TRUNC19]] + ; SI: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16) + ; SI: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16) + ; SI: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C6]](s32) + ; SI: [[OR14:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL14]] ; SI: [[GEP19:%[0-9]+]]:_(p5) = G_GEP [[GEP11]], [[C4]](s32) ; SI: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[GEP19]](p5) :: (load 1, addrspace 5) ; SI: [[GEP20:%[0-9]+]]:_(p5) = G_GEP [[GEP19]], [[C]](s32) @@ -9320,21 +10204,24 @@ body: | ; SI: [[COPY21:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; SI: [[COPY22:%[0-9]+]]:_(s32) = COPY [[LOAD21]](s32) ; SI: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY22]], [[C5]] - ; SI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[COPY21]](s32) - ; SI: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[SHL10]](s32) - ; SI: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[TRUNC21]] + ; SI: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[COPY21]](s32) + ; SI: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[SHL15]](s32) + ; SI: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[TRUNC21]] ; SI: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32) ; SI: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C3]] ; SI: [[COPY23:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; SI: [[COPY24:%[0-9]+]]:_(s32) = COPY [[LOAD23]](s32) ; SI: [[AND23:%[0-9]+]]:_(s32) = G_AND [[COPY24]], [[C5]] - ; SI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[COPY23]](s32) - ; SI: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[SHL11]](s32) - ; SI: [[OR11:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[TRUNC23]] - ; SI: [[MV6:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR10]](s16), [[OR11]](s16) - ; SI: [[MV7:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[MV4]](s32), [[MV5]](s32), [[MV6]](s32) - ; SI: [[COPY25:%[0-9]+]]:_(s96) = COPY [[MV3]](s96) - ; SI: [[COPY26:%[0-9]+]]:_(s96) = COPY [[MV7]](s96) + ; SI: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[COPY23]](s32) + ; SI: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[SHL16]](s32) + ; SI: [[OR16:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[TRUNC23]] + ; SI: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR15]](s16) + ; SI: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR16]](s16) + ; SI: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C6]](s32) + ; SI: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]] + ; SI: [[MV1:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR11]](s32), [[OR14]](s32), [[OR17]](s32) + ; SI: [[COPY25:%[0-9]+]]:_(s96) = COPY [[MV]](s96) + ; SI: [[COPY26:%[0-9]+]]:_(s96) = COPY [[MV1]](s96) ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY25]](s96) ; SI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY26]](s96) ; CI-LABEL: name: test_extload_private_v2s96_from_24_align1 @@ -9368,9 +10255,13 @@ body: | ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY3]](s32) ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C6]](s32) + ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32) + ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C7]](s32) ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p5) :: (load 1, addrspace 5) ; CI: [[GEP4:%[0-9]+]]:_(p5) = G_GEP [[GEP3]], [[C]](s32) ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p5) :: (load 1, addrspace 5) @@ -9383,18 +10274,21 @@ body: | ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]] - ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY5]](s32) - ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; CI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] + ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY5]](s32) + ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) + ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; CI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; CI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; CI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C5]] - ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY7]](s32) - ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) + ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY7]](s32) + ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) + ; CI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] + ; CI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C6]](s32) + ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] ; CI: [[GEP7:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C4]](s32) ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p5) :: (load 1, addrspace 5) ; CI: [[GEP8:%[0-9]+]]:_(p5) = G_GEP [[GEP7]], [[C]](s32) @@ -9408,21 +10302,24 @@ body: | ; CI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) ; CI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C5]] - ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY9]](s32) - ; CI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; CI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] + ; CI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY9]](s32) + ; CI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) + ; CI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] ; CI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; CI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C3]] ; CI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) ; CI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C5]] - ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY11]](s32) - ; CI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) - ; CI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] - ; CI: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16) - ; CI: [[MV3:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32), [[MV2]](s32) - ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CI: [[GEP11:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C7]](s32) + ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY11]](s32) + ; CI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) + ; CI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] + ; CI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; CI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; CI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C6]](s32) + ; CI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] + ; CI: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) + ; CI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CI: [[GEP11:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C8]](s32) ; CI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[GEP11]](p5) :: (load 1, addrspace 5) ; CI: [[GEP12:%[0-9]+]]:_(p5) = G_GEP [[GEP11]], [[C]](s32) ; CI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[GEP12]](p5) :: (load 1, addrspace 5) @@ -9435,19 +10332,22 @@ body: | ; CI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) ; CI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C5]] - ; CI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY13]](s32) - ; CI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) - ; CI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] + ; CI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY13]](s32) + ; CI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) + ; CI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC13]] ; CI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; CI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C3]] ; CI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32) ; CI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C5]] - ; CI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY15]](s32) - ; CI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) - ; CI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] - ; CI: [[MV4:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR6]](s16), [[OR7]](s16) - ; CI: [[GEP15:%[0-9]+]]:_(p5) = G_GEP [[GEP11]], [[C6]](s32) + ; CI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY15]](s32) + ; CI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL10]](s32) + ; CI: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[TRUNC15]] + ; CI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; CI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR10]](s16) + ; CI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C6]](s32) + ; CI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; CI: [[GEP15:%[0-9]+]]:_(p5) = G_GEP [[GEP11]], [[C7]](s32) ; CI: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[GEP15]](p5) :: (load 1, addrspace 5) ; CI: [[GEP16:%[0-9]+]]:_(p5) = G_GEP [[GEP15]], [[C]](s32) ; CI: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[GEP16]](p5) :: (load 1, addrspace 5) @@ -9460,18 +10360,21 @@ body: | ; CI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LOAD17]](s32) ; CI: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY18]], [[C5]] - ; CI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[COPY17]](s32) - ; CI: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32) - ; CI: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[TRUNC17]] + ; CI: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[COPY17]](s32) + ; CI: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[SHL12]](s32) + ; CI: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[TRUNC17]] ; CI: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32) ; CI: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C3]] ; CI: [[COPY19:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI: [[COPY20:%[0-9]+]]:_(s32) = COPY [[LOAD19]](s32) ; CI: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY20]], [[C5]] - ; CI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[COPY19]](s32) - ; CI: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) - ; CI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[TRUNC19]] - ; CI: [[MV5:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR8]](s16), [[OR9]](s16) + ; CI: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[COPY19]](s32) + ; CI: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[SHL13]](s32) + ; CI: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[TRUNC19]] + ; CI: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16) + ; CI: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16) + ; CI: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C6]](s32) + ; CI: [[OR14:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL14]] ; CI: [[GEP19:%[0-9]+]]:_(p5) = G_GEP [[GEP11]], [[C4]](s32) ; CI: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[GEP19]](p5) :: (load 1, addrspace 5) ; CI: [[GEP20:%[0-9]+]]:_(p5) = G_GEP [[GEP19]], [[C]](s32) @@ -9485,21 +10388,24 @@ body: | ; CI: [[COPY21:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI: [[COPY22:%[0-9]+]]:_(s32) = COPY [[LOAD21]](s32) ; CI: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY22]], [[C5]] - ; CI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[COPY21]](s32) - ; CI: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[SHL10]](s32) - ; CI: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[TRUNC21]] + ; CI: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[COPY21]](s32) + ; CI: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[SHL15]](s32) + ; CI: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[TRUNC21]] ; CI: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32) ; CI: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C3]] ; CI: [[COPY23:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; CI: [[COPY24:%[0-9]+]]:_(s32) = COPY [[LOAD23]](s32) ; CI: [[AND23:%[0-9]+]]:_(s32) = G_AND [[COPY24]], [[C5]] - ; CI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[COPY23]](s32) - ; CI: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[SHL11]](s32) - ; CI: [[OR11:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[TRUNC23]] - ; CI: [[MV6:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR10]](s16), [[OR11]](s16) - ; CI: [[MV7:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[MV4]](s32), [[MV5]](s32), [[MV6]](s32) - ; CI: [[COPY25:%[0-9]+]]:_(s96) = COPY [[MV3]](s96) - ; CI: [[COPY26:%[0-9]+]]:_(s96) = COPY [[MV7]](s96) + ; CI: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[COPY23]](s32) + ; CI: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[SHL16]](s32) + ; CI: [[OR16:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[TRUNC23]] + ; CI: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR15]](s16) + ; CI: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR16]](s16) + ; CI: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C6]](s32) + ; CI: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]] + ; CI: [[MV1:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR11]](s32), [[OR14]](s32), [[OR17]](s32) + ; CI: [[COPY25:%[0-9]+]]:_(s96) = COPY [[MV]](s96) + ; CI: [[COPY26:%[0-9]+]]:_(s96) = COPY [[MV1]](s96) ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY25]](s96) ; CI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY26]](s96) ; VI-LABEL: name: test_extload_private_v2s96_from_24_align1 @@ -9528,9 +10434,13 @@ body: | ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; VI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C5]](s32) + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; VI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C6]](s32) ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p5) :: (load 1, addrspace 5) ; VI: [[GEP4:%[0-9]+]]:_(p5) = G_GEP [[GEP3]], [[C]](s32) ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p5) :: (load 1, addrspace 5) @@ -9542,17 +10452,20 @@ body: | ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C3]] ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C3]] - ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) - ; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] + ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) + ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]] ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; VI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C3]] - ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) - ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; VI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) - ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI: [[GEP7:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C6]](s32) + ; VI: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) + ; VI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL4]] + ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C5]](s32) + ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; VI: [[GEP7:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C7]](s32) ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p5) :: (load 1, addrspace 5) ; VI: [[GEP8:%[0-9]+]]:_(p5) = G_GEP [[GEP7]], [[C]](s32) ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p5) :: (load 1, addrspace 5) @@ -9564,18 +10477,21 @@ body: | ; VI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C3]] ; VI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) ; VI: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C3]] - ; VI: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C4]](s16) - ; VI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]] + ; VI: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C4]](s16) + ; VI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]] ; VI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; VI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C3]] ; VI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) ; VI: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C3]] - ; VI: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C4]](s16) - ; VI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] - ; VI: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16) - ; VI: [[MV3:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32), [[MV2]](s32) - ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; VI: [[GEP11:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C7]](s32) + ; VI: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C4]](s16) + ; VI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]] + ; VI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; VI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; VI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C5]](s32) + ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] + ; VI: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) + ; VI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; VI: [[GEP11:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C8]](s32) ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[GEP11]](p5) :: (load 1, addrspace 5) ; VI: [[GEP12:%[0-9]+]]:_(p5) = G_GEP [[GEP11]], [[C]](s32) ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[GEP12]](p5) :: (load 1, addrspace 5) @@ -9587,16 +10503,19 @@ body: | ; VI: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C3]] ; VI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32) ; VI: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C3]] - ; VI: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C4]](s16) - ; VI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL6]] + ; VI: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C4]](s16) + ; VI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL9]] ; VI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; VI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C3]] ; VI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32) ; VI: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C3]] - ; VI: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C4]](s16) - ; VI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL7]] - ; VI: [[MV4:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR6]](s16), [[OR7]](s16) - ; VI: [[GEP15:%[0-9]+]]:_(p5) = G_GEP [[GEP11]], [[C5]](s32) + ; VI: [[SHL10:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C4]](s16) + ; VI: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL10]] + ; VI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; VI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR10]](s16) + ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C5]](s32) + ; VI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; VI: [[GEP15:%[0-9]+]]:_(p5) = G_GEP [[GEP11]], [[C6]](s32) ; VI: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[GEP15]](p5) :: (load 1, addrspace 5) ; VI: [[GEP16:%[0-9]+]]:_(p5) = G_GEP [[GEP15]], [[C]](s32) ; VI: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[GEP16]](p5) :: (load 1, addrspace 5) @@ -9608,16 +10527,19 @@ body: | ; VI: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C3]] ; VI: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD17]](s32) ; VI: [[AND17:%[0-9]+]]:_(s16) = G_AND [[TRUNC17]], [[C3]] - ; VI: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND17]], [[C4]](s16) - ; VI: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[SHL8]] + ; VI: [[SHL12:%[0-9]+]]:_(s16) = G_SHL [[AND17]], [[C4]](s16) + ; VI: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[SHL12]] ; VI: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32) ; VI: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C3]] ; VI: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD19]](s32) ; VI: [[AND19:%[0-9]+]]:_(s16) = G_AND [[TRUNC19]], [[C3]] - ; VI: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND19]], [[C4]](s16) - ; VI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[SHL9]] - ; VI: [[MV5:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR8]](s16), [[OR9]](s16) - ; VI: [[GEP19:%[0-9]+]]:_(p5) = G_GEP [[GEP11]], [[C6]](s32) + ; VI: [[SHL13:%[0-9]+]]:_(s16) = G_SHL [[AND19]], [[C4]](s16) + ; VI: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[SHL13]] + ; VI: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16) + ; VI: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16) + ; VI: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C5]](s32) + ; VI: [[OR14:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL14]] + ; VI: [[GEP19:%[0-9]+]]:_(p5) = G_GEP [[GEP11]], [[C7]](s32) ; VI: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[GEP19]](p5) :: (load 1, addrspace 5) ; VI: [[GEP20:%[0-9]+]]:_(p5) = G_GEP [[GEP19]], [[C]](s32) ; VI: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[GEP20]](p5) :: (load 1, addrspace 5) @@ -9629,18 +10551,21 @@ body: | ; VI: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C3]] ; VI: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD21]](s32) ; VI: [[AND21:%[0-9]+]]:_(s16) = G_AND [[TRUNC21]], [[C3]] - ; VI: [[SHL10:%[0-9]+]]:_(s16) = G_SHL [[AND21]], [[C4]](s16) - ; VI: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[SHL10]] + ; VI: [[SHL15:%[0-9]+]]:_(s16) = G_SHL [[AND21]], [[C4]](s16) + ; VI: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[SHL15]] ; VI: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32) ; VI: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C3]] ; VI: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD23]](s32) ; VI: [[AND23:%[0-9]+]]:_(s16) = G_AND [[TRUNC23]], [[C3]] - ; VI: [[SHL11:%[0-9]+]]:_(s16) = G_SHL [[AND23]], [[C4]](s16) - ; VI: [[OR11:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[SHL11]] - ; VI: [[MV6:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR10]](s16), [[OR11]](s16) - ; VI: [[MV7:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[MV4]](s32), [[MV5]](s32), [[MV6]](s32) - ; VI: [[COPY1:%[0-9]+]]:_(s96) = COPY [[MV3]](s96) - ; VI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[MV7]](s96) + ; VI: [[SHL16:%[0-9]+]]:_(s16) = G_SHL [[AND23]], [[C4]](s16) + ; VI: [[OR16:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[SHL16]] + ; VI: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR15]](s16) + ; VI: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR16]](s16) + ; VI: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C5]](s32) + ; VI: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]] + ; VI: [[MV1:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR11]](s32), [[OR14]](s32), [[OR17]](s32) + ; VI: [[COPY1:%[0-9]+]]:_(s96) = COPY [[MV]](s96) + ; VI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[MV1]](s96) ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; VI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) ; GFX9-LABEL: name: test_extload_private_v2s96_from_24_align1 @@ -9669,9 +10594,13 @@ body: | ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; GFX9: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C5]](s32) + ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32) + ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX9: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C6]](s32) ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p5) :: (load 1, addrspace 5) ; GFX9: [[GEP4:%[0-9]+]]:_(p5) = G_GEP [[GEP3]], [[C]](s32) ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p5) :: (load 1, addrspace 5) @@ -9683,17 +10612,20 @@ body: | ; GFX9: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C3]] ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) ; GFX9: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C3]] - ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) - ; GFX9: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] + ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) + ; GFX9: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]] ; GFX9: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; GFX9: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] ; GFX9: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) ; GFX9: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C3]] - ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) - ; GFX9: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; GFX9: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR2]](s16), [[OR3]](s16) - ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9: [[GEP7:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C6]](s32) + ; GFX9: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) + ; GFX9: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL4]] + ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C5]](s32) + ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; GFX9: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9: [[GEP7:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C7]](s32) ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p5) :: (load 1, addrspace 5) ; GFX9: [[GEP8:%[0-9]+]]:_(p5) = G_GEP [[GEP7]], [[C]](s32) ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p5) :: (load 1, addrspace 5) @@ -9705,18 +10637,21 @@ body: | ; GFX9: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C3]] ; GFX9: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) ; GFX9: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C3]] - ; GFX9: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C4]](s16) - ; GFX9: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]] + ; GFX9: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C4]](s16) + ; GFX9: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]] ; GFX9: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; GFX9: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C3]] ; GFX9: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) ; GFX9: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C3]] - ; GFX9: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C4]](s16) - ; GFX9: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] - ; GFX9: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR4]](s16), [[OR5]](s16) - ; GFX9: [[MV3:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32), [[MV2]](s32) - ; GFX9: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX9: [[GEP11:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C7]](s32) + ; GFX9: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C4]](s16) + ; GFX9: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]] + ; GFX9: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; GFX9: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; GFX9: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C5]](s32) + ; GFX9: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] + ; GFX9: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) + ; GFX9: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; GFX9: [[GEP11:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C8]](s32) ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[GEP11]](p5) :: (load 1, addrspace 5) ; GFX9: [[GEP12:%[0-9]+]]:_(p5) = G_GEP [[GEP11]], [[C]](s32) ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[GEP12]](p5) :: (load 1, addrspace 5) @@ -9728,16 +10663,19 @@ body: | ; GFX9: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C3]] ; GFX9: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32) ; GFX9: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C3]] - ; GFX9: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C4]](s16) - ; GFX9: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL6]] + ; GFX9: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C4]](s16) + ; GFX9: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL9]] ; GFX9: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32) ; GFX9: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C3]] ; GFX9: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32) ; GFX9: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C3]] - ; GFX9: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C4]](s16) - ; GFX9: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL7]] - ; GFX9: [[MV4:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR6]](s16), [[OR7]](s16) - ; GFX9: [[GEP15:%[0-9]+]]:_(p5) = G_GEP [[GEP11]], [[C5]](s32) + ; GFX9: [[SHL10:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C4]](s16) + ; GFX9: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL10]] + ; GFX9: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; GFX9: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR10]](s16) + ; GFX9: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C5]](s32) + ; GFX9: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; GFX9: [[GEP15:%[0-9]+]]:_(p5) = G_GEP [[GEP11]], [[C6]](s32) ; GFX9: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[GEP15]](p5) :: (load 1, addrspace 5) ; GFX9: [[GEP16:%[0-9]+]]:_(p5) = G_GEP [[GEP15]], [[C]](s32) ; GFX9: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[GEP16]](p5) :: (load 1, addrspace 5) @@ -9749,16 +10687,19 @@ body: | ; GFX9: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C3]] ; GFX9: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD17]](s32) ; GFX9: [[AND17:%[0-9]+]]:_(s16) = G_AND [[TRUNC17]], [[C3]] - ; GFX9: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND17]], [[C4]](s16) - ; GFX9: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[SHL8]] + ; GFX9: [[SHL12:%[0-9]+]]:_(s16) = G_SHL [[AND17]], [[C4]](s16) + ; GFX9: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[SHL12]] ; GFX9: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD18]](s32) ; GFX9: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C3]] ; GFX9: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD19]](s32) ; GFX9: [[AND19:%[0-9]+]]:_(s16) = G_AND [[TRUNC19]], [[C3]] - ; GFX9: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND19]], [[C4]](s16) - ; GFX9: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[SHL9]] - ; GFX9: [[MV5:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR8]](s16), [[OR9]](s16) - ; GFX9: [[GEP19:%[0-9]+]]:_(p5) = G_GEP [[GEP11]], [[C6]](s32) + ; GFX9: [[SHL13:%[0-9]+]]:_(s16) = G_SHL [[AND19]], [[C4]](s16) + ; GFX9: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[SHL13]] + ; GFX9: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16) + ; GFX9: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16) + ; GFX9: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C5]](s32) + ; GFX9: [[OR14:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL14]] + ; GFX9: [[GEP19:%[0-9]+]]:_(p5) = G_GEP [[GEP11]], [[C7]](s32) ; GFX9: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[GEP19]](p5) :: (load 1, addrspace 5) ; GFX9: [[GEP20:%[0-9]+]]:_(p5) = G_GEP [[GEP19]], [[C]](s32) ; GFX9: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[GEP20]](p5) :: (load 1, addrspace 5) @@ -9770,18 +10711,21 @@ body: | ; GFX9: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C3]] ; GFX9: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD21]](s32) ; GFX9: [[AND21:%[0-9]+]]:_(s16) = G_AND [[TRUNC21]], [[C3]] - ; GFX9: [[SHL10:%[0-9]+]]:_(s16) = G_SHL [[AND21]], [[C4]](s16) - ; GFX9: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[SHL10]] + ; GFX9: [[SHL15:%[0-9]+]]:_(s16) = G_SHL [[AND21]], [[C4]](s16) + ; GFX9: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[SHL15]] ; GFX9: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD22]](s32) ; GFX9: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C3]] ; GFX9: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD23]](s32) ; GFX9: [[AND23:%[0-9]+]]:_(s16) = G_AND [[TRUNC23]], [[C3]] - ; GFX9: [[SHL11:%[0-9]+]]:_(s16) = G_SHL [[AND23]], [[C4]](s16) - ; GFX9: [[OR11:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[SHL11]] - ; GFX9: [[MV6:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR10]](s16), [[OR11]](s16) - ; GFX9: [[MV7:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[MV4]](s32), [[MV5]](s32), [[MV6]](s32) - ; GFX9: [[COPY1:%[0-9]+]]:_(s96) = COPY [[MV3]](s96) - ; GFX9: [[COPY2:%[0-9]+]]:_(s96) = COPY [[MV7]](s96) + ; GFX9: [[SHL16:%[0-9]+]]:_(s16) = G_SHL [[AND23]], [[C4]](s16) + ; GFX9: [[OR16:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[SHL16]] + ; GFX9: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR15]](s16) + ; GFX9: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR16]](s16) + ; GFX9: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C5]](s32) + ; GFX9: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]] + ; GFX9: [[MV1:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR11]](s32), [[OR14]](s32), [[OR17]](s32) + ; GFX9: [[COPY1:%[0-9]+]]:_(s96) = COPY [[MV]](s96) + ; GFX9: [[COPY2:%[0-9]+]]:_(s96) = COPY [[MV1]](s96) ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX9: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) %0:_(p5) = COPY $vgpr0 @@ -9801,215 +10745,295 @@ body: | ; SI-LABEL: name: test_extload_private_v2s96_from_24_align2 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[GEP:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C]](s32) ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p5) :: (load 2, addrspace 5) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; SI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; SI: [[GEP1:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C1]](s32) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; SI: [[GEP1:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C3]](s32) ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p5) :: (load 2, addrspace 5) - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; SI: [[GEP2:%[0-9]+]]:_(p5) = G_GEP [[GEP1]], [[C]](s32) ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p5) :: (load 2, addrspace 5) - ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; SI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC2]](s16), [[TRUNC3]](s16) - ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C2]](s32) + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; SI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C4]](s32) ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p5) :: (load 2, addrspace 5) - ; SI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; SI: [[GEP4:%[0-9]+]]:_(p5) = G_GEP [[GEP3]], [[C]](s32) ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p5) :: (load 2, addrspace 5) - ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) - ; SI: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC4]](s16), [[TRUNC5]](s16) - ; SI: [[MV3:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32), [[MV2]](s32) - ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; SI: [[GEP5:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C3]](s32) + ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] + ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] + ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) + ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; SI: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32), [[OR2]](s32) + ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; SI: [[GEP5:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C5]](s32) ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[GEP5]](p5) :: (load 2, addrspace 5) - ; SI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; SI: [[GEP6:%[0-9]+]]:_(p5) = G_GEP [[GEP5]], [[C]](s32) ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[GEP6]](p5) :: (load 2, addrspace 5) - ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; SI: [[MV4:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC6]](s16), [[TRUNC7]](s16) - ; SI: [[GEP7:%[0-9]+]]:_(p5) = G_GEP [[GEP5]], [[C1]](s32) + ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] + ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) + ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]] + ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32) + ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; SI: [[GEP7:%[0-9]+]]:_(p5) = G_GEP [[GEP5]], [[C3]](s32) ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p5) :: (load 2, addrspace 5) - ; SI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32) ; SI: [[GEP8:%[0-9]+]]:_(p5) = G_GEP [[GEP7]], [[C]](s32) ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p5) :: (load 2, addrspace 5) - ; SI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) - ; SI: [[MV5:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC8]](s16), [[TRUNC9]](s16) - ; SI: [[GEP9:%[0-9]+]]:_(p5) = G_GEP [[GEP5]], [[C2]](s32) + ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) + ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C1]] + ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) + ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]] + ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32) + ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]] + ; SI: [[GEP9:%[0-9]+]]:_(p5) = G_GEP [[GEP5]], [[C4]](s32) ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[GEP9]](p5) :: (load 2, addrspace 5) - ; SI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; SI: [[GEP10:%[0-9]+]]:_(p5) = G_GEP [[GEP9]], [[C]](s32) ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[GEP10]](p5) :: (load 2, addrspace 5) - ; SI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) - ; SI: [[MV6:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC10]](s16), [[TRUNC11]](s16) - ; SI: [[MV7:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[MV4]](s32), [[MV5]](s32), [[MV6]](s32) - ; SI: [[COPY1:%[0-9]+]]:_(s96) = COPY [[MV3]](s96) - ; SI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[MV7]](s96) - ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; SI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32) + ; SI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]] + ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) + ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]] + ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32) + ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]] + ; SI: [[MV1:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR3]](s32), [[OR4]](s32), [[OR5]](s32) + ; SI: [[COPY13:%[0-9]+]]:_(s96) = COPY [[MV]](s96) + ; SI: [[COPY14:%[0-9]+]]:_(s96) = COPY [[MV1]](s96) + ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY13]](s96) + ; SI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY14]](s96) ; CI-LABEL: name: test_extload_private_v2s96_from_24_align2 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) - ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[GEP:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C]](s32) ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p5) :: (load 2, addrspace 5) - ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; CI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CI: [[GEP1:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C1]](s32) + ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CI: [[GEP1:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C3]](s32) ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p5) :: (load 2, addrspace 5) - ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI: [[GEP2:%[0-9]+]]:_(p5) = G_GEP [[GEP1]], [[C]](s32) ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p5) :: (load 2, addrspace 5) - ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; CI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC2]](s16), [[TRUNC3]](s16) - ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C2]](s32) + ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C4]](s32) ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p5) :: (load 2, addrspace 5) - ; CI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; CI: [[GEP4:%[0-9]+]]:_(p5) = G_GEP [[GEP3]], [[C]](s32) ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p5) :: (load 2, addrspace 5) - ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) - ; CI: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC4]](s16), [[TRUNC5]](s16) - ; CI: [[MV3:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32), [[MV2]](s32) - ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CI: [[GEP5:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C3]](s32) + ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] + ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] + ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) + ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CI: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32), [[OR2]](s32) + ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CI: [[GEP5:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C5]](s32) ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[GEP5]](p5) :: (load 2, addrspace 5) - ; CI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; CI: [[GEP6:%[0-9]+]]:_(p5) = G_GEP [[GEP5]], [[C]](s32) ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[GEP6]](p5) :: (load 2, addrspace 5) - ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; CI: [[MV4:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC6]](s16), [[TRUNC7]](s16) - ; CI: [[GEP7:%[0-9]+]]:_(p5) = G_GEP [[GEP5]], [[C1]](s32) + ; CI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] + ; CI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) + ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]] + ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32) + ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; CI: [[GEP7:%[0-9]+]]:_(p5) = G_GEP [[GEP5]], [[C3]](s32) ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p5) :: (load 2, addrspace 5) - ; CI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32) ; CI: [[GEP8:%[0-9]+]]:_(p5) = G_GEP [[GEP7]], [[C]](s32) ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p5) :: (load 2, addrspace 5) - ; CI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) - ; CI: [[MV5:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC8]](s16), [[TRUNC9]](s16) - ; CI: [[GEP9:%[0-9]+]]:_(p5) = G_GEP [[GEP5]], [[C2]](s32) + ; CI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) + ; CI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C1]] + ; CI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) + ; CI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]] + ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32) + ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]] + ; CI: [[GEP9:%[0-9]+]]:_(p5) = G_GEP [[GEP5]], [[C4]](s32) ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[GEP9]](p5) :: (load 2, addrspace 5) - ; CI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; CI: [[GEP10:%[0-9]+]]:_(p5) = G_GEP [[GEP9]], [[C]](s32) ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[GEP10]](p5) :: (load 2, addrspace 5) - ; CI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) - ; CI: [[MV6:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC10]](s16), [[TRUNC11]](s16) - ; CI: [[MV7:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[MV4]](s32), [[MV5]](s32), [[MV6]](s32) - ; CI: [[COPY1:%[0-9]+]]:_(s96) = COPY [[MV3]](s96) - ; CI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[MV7]](s96) - ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; CI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; CI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32) + ; CI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]] + ; CI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) + ; CI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]] + ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32) + ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]] + ; CI: [[MV1:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR3]](s32), [[OR4]](s32), [[OR5]](s32) + ; CI: [[COPY13:%[0-9]+]]:_(s96) = COPY [[MV]](s96) + ; CI: [[COPY14:%[0-9]+]]:_(s96) = COPY [[MV1]](s96) + ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY13]](s96) + ; CI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY14]](s96) ; VI-LABEL: name: test_extload_private_v2s96_from_24_align2 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[GEP:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C]](s32) ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p5) :: (load 2, addrspace 5) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; VI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; VI: [[GEP1:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C1]](s32) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; VI: [[GEP1:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C3]](s32) ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p5) :: (load 2, addrspace 5) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; VI: [[GEP2:%[0-9]+]]:_(p5) = G_GEP [[GEP1]], [[C]](s32) ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p5) :: (load 2, addrspace 5) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; VI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC2]](s16), [[TRUNC3]](s16) - ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C2]](s32) + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; VI: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C4]](s32) ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p5) :: (load 2, addrspace 5) - ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; VI: [[GEP4:%[0-9]+]]:_(p5) = G_GEP [[GEP3]], [[C]](s32) ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p5) :: (load 2, addrspace 5) - ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) - ; VI: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC4]](s16), [[TRUNC5]](s16) - ; VI: [[MV3:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32), [[MV2]](s32) - ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; VI: [[GEP5:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C3]](s32) + ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] + ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; VI: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32), [[OR2]](s32) + ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; VI: [[GEP5:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C5]](s32) ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[GEP5]](p5) :: (load 2, addrspace 5) - ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; VI: [[GEP6:%[0-9]+]]:_(p5) = G_GEP [[GEP5]], [[C]](s32) ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[GEP6]](p5) :: (load 2, addrspace 5) - ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; VI: [[MV4:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC6]](s16), [[TRUNC7]](s16) - ; VI: [[GEP7:%[0-9]+]]:_(p5) = G_GEP [[GEP5]], [[C1]](s32) + ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] + ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) + ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]] + ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32) + ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; VI: [[GEP7:%[0-9]+]]:_(p5) = G_GEP [[GEP5]], [[C3]](s32) ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p5) :: (load 2, addrspace 5) - ; VI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32) ; VI: [[GEP8:%[0-9]+]]:_(p5) = G_GEP [[GEP7]], [[C]](s32) ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p5) :: (load 2, addrspace 5) - ; VI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) - ; VI: [[MV5:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC8]](s16), [[TRUNC9]](s16) - ; VI: [[GEP9:%[0-9]+]]:_(p5) = G_GEP [[GEP5]], [[C2]](s32) + ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) + ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C1]] + ; VI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) + ; VI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]] + ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32) + ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]] + ; VI: [[GEP9:%[0-9]+]]:_(p5) = G_GEP [[GEP5]], [[C4]](s32) ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[GEP9]](p5) :: (load 2, addrspace 5) - ; VI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; VI: [[GEP10:%[0-9]+]]:_(p5) = G_GEP [[GEP9]], [[C]](s32) ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[GEP10]](p5) :: (load 2, addrspace 5) - ; VI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) - ; VI: [[MV6:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC10]](s16), [[TRUNC11]](s16) - ; VI: [[MV7:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[MV4]](s32), [[MV5]](s32), [[MV6]](s32) - ; VI: [[COPY1:%[0-9]+]]:_(s96) = COPY [[MV3]](s96) - ; VI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[MV7]](s96) - ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; VI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; VI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32) + ; VI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]] + ; VI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) + ; VI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]] + ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32) + ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]] + ; VI: [[MV1:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR3]](s32), [[OR4]](s32), [[OR5]](s32) + ; VI: [[COPY13:%[0-9]+]]:_(s96) = COPY [[MV]](s96) + ; VI: [[COPY14:%[0-9]+]]:_(s96) = COPY [[MV1]](s96) + ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY13]](s96) + ; VI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY14]](s96) ; GFX9-LABEL: name: test_extload_private_v2s96_from_24_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[GEP:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C]](s32) ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p5) :: (load 2, addrspace 5) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX9: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX9: [[GEP1:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C1]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX9: [[GEP1:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C3]](s32) ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p5) :: (load 2, addrspace 5) - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; GFX9: [[GEP2:%[0-9]+]]:_(p5) = G_GEP [[GEP1]], [[C]](s32) ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p5) :: (load 2, addrspace 5) - ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX9: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C2]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9: [[GEP3:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C4]](s32) ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p5) :: (load 2, addrspace 5) - ; GFX9: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; GFX9: [[GEP4:%[0-9]+]]:_(p5) = G_GEP [[GEP3]], [[C]](s32) ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p5) :: (load 2, addrspace 5) - ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) - ; GFX9: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC4]](s16), [[TRUNC5]](s16) - ; GFX9: [[MV3:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32), [[MV2]](s32) - ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX9: [[GEP5:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C3]](s32) + ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] + ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] + ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) + ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; GFX9: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32), [[OR2]](s32) + ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; GFX9: [[GEP5:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C5]](s32) ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[GEP5]](p5) :: (load 2, addrspace 5) - ; GFX9: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; GFX9: [[GEP6:%[0-9]+]]:_(p5) = G_GEP [[GEP5]], [[C]](s32) ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[GEP6]](p5) :: (load 2, addrspace 5) - ; GFX9: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; GFX9: [[MV4:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC6]](s16), [[TRUNC7]](s16) - ; GFX9: [[GEP7:%[0-9]+]]:_(p5) = G_GEP [[GEP5]], [[C1]](s32) + ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; GFX9: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] + ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) + ; GFX9: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]] + ; GFX9: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32) + ; GFX9: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; GFX9: [[GEP7:%[0-9]+]]:_(p5) = G_GEP [[GEP5]], [[C3]](s32) ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p5) :: (load 2, addrspace 5) - ; GFX9: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32) ; GFX9: [[GEP8:%[0-9]+]]:_(p5) = G_GEP [[GEP7]], [[C]](s32) ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p5) :: (load 2, addrspace 5) - ; GFX9: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) - ; GFX9: [[MV5:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC8]](s16), [[TRUNC9]](s16) - ; GFX9: [[GEP9:%[0-9]+]]:_(p5) = G_GEP [[GEP5]], [[C2]](s32) + ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) + ; GFX9: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C1]] + ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) + ; GFX9: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C1]] + ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32) + ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]] + ; GFX9: [[GEP9:%[0-9]+]]:_(p5) = G_GEP [[GEP5]], [[C4]](s32) ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[GEP9]](p5) :: (load 2, addrspace 5) - ; GFX9: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; GFX9: [[GEP10:%[0-9]+]]:_(p5) = G_GEP [[GEP9]], [[C]](s32) ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[GEP10]](p5) :: (load 2, addrspace 5) - ; GFX9: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) - ; GFX9: [[MV6:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC10]](s16), [[TRUNC11]](s16) - ; GFX9: [[MV7:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[MV4]](s32), [[MV5]](s32), [[MV6]](s32) - ; GFX9: [[COPY1:%[0-9]+]]:_(s96) = COPY [[MV3]](s96) - ; GFX9: [[COPY2:%[0-9]+]]:_(s96) = COPY [[MV7]](s96) - ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; GFX9: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32) + ; GFX9: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]] + ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) + ; GFX9: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]] + ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32) + ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]] + ; GFX9: [[MV1:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR3]](s32), [[OR4]](s32), [[OR5]](s32) + ; GFX9: [[COPY13:%[0-9]+]]:_(s96) = COPY [[MV]](s96) + ; GFX9: [[COPY14:%[0-9]+]]:_(s96) = COPY [[MV1]](s96) + ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[COPY13]](s96) + ; GFX9: $vgpr3_vgpr4_vgpr5 = COPY [[COPY14]](s96) %0:_(p5) = COPY $vgpr0 %1:_(<2 x s96>) = G_LOAD %0 :: (load 24, align 2, addrspace 5) %2:_(s96) = G_EXTRACT %1, 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-merge-values.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-merge-values.mir index 00538251f091b..2a981be56c41d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-merge-values.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-merge-values.mir @@ -55,7 +55,16 @@ body: | ; CHECK: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C1]](s32) ; CHECK: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; CHECK: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CHECK: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) + ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) + ; CHECK: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; CHECK: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CHECK: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CHECK: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C3]](s32) + ; CHECK: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; CHECK: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; CHECK: $vgpr0_vgpr1 = COPY [[MV]](p1) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 @@ -131,8 +140,12 @@ body: | ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CHECK: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CHECK: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[MV]](s32) + ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CHECK: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C6]](s32) + ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) ; CHECK: $vgpr0 = COPY [[COPY3]](s32) %0:_(s8) = G_CONSTANT i8 0 %1:_(s8) = G_CONSTANT i8 1 @@ -169,8 +182,12 @@ body: | ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32) ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; CHECK: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] - ; CHECK: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) - ; CHECK: $vgpr0 = COPY [[MV]](s32) + ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CHECK: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C7]](s32) + ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; CHECK: $vgpr0 = COPY [[OR2]](s32) %0:_(s8) = G_CONSTANT i8 0 %1:_(s8) = G_CONSTANT i8 1 %2:_(s8) = G_CONSTANT i8 2 @@ -205,11 +222,21 @@ body: | ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C]] + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C]] + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C]] + ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[COPY3]](s32) + ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C]] + ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) + ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; CHECK: $vgpr1_vgpr2 = COPY [[MV]](s64) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 @@ -278,8 +305,12 @@ body: | ; CHECK: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C9]](s32) ; CHECK: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) ; CHECK: [[OR5:%[0-9]+]]:_(s16) = G_OR [[OR4]], [[TRUNC7]] - ; CHECK: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR2]](s16), [[OR5]](s16) - ; CHECK: [[TRUNC8:%[0-9]+]]:_(s24) = G_TRUNC [[MV]](s32) + ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) + ; CHECK: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32) + ; CHECK: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL6]] + ; CHECK: [[TRUNC8:%[0-9]+]]:_(s24) = G_TRUNC [[OR6]](s32) ; CHECK: S_NOP 0, implicit [[TRUNC8]](s24) %0:_(s4) = G_CONSTANT i4 0 %1:_(s4) = G_CONSTANT i4 1 @@ -346,8 +377,12 @@ body: | ; CHECK: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C10]](s32) ; CHECK: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) ; CHECK: [[OR5:%[0-9]+]]:_(s16) = G_OR [[OR4]], [[TRUNC7]] - ; CHECK: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR2]](s16), [[OR5]](s16) - ; CHECK: [[TRUNC8:%[0-9]+]]:_(s28) = G_TRUNC [[MV]](s32) + ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) + ; CHECK: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C11]](s32) + ; CHECK: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL6]] + ; CHECK: [[TRUNC8:%[0-9]+]]:_(s28) = G_TRUNC [[OR6]](s32) ; CHECK: S_NOP 0, implicit [[TRUNC8]](s28) %0:_(s4) = G_CONSTANT i4 0 %1:_(s4) = G_CONSTANT i4 1 @@ -442,7 +477,20 @@ body: | ; CHECK: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY10]](s32) ; CHECK: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) ; CHECK: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] - ; CHECK: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16), [[OR4]](s16), [[OR5]](s16) + ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CHECK: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C14]](s32) + ; CHECK: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL6]] + ; CHECK: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CHECK: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CHECK: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C14]](s32) + ; CHECK: [[OR7:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL7]] + ; CHECK: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; CHECK: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) + ; CHECK: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C14]](s32) + ; CHECK: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]] + ; CHECK: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR6]](s32), [[OR7]](s32), [[OR8]](s32) ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) %0:_(s8) = G_CONSTANT i8 0 %1:_(s8) = G_CONSTANT i8 1 @@ -466,13 +514,20 @@ name: test_merge_s96_s16_s16_s16_s16_s16_s16 body: | bb.0: ; CHECK-LABEL: name: test_merge_s96_s16_s16_s16_s16_s16_s16 - ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; CHECK: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; CHECK: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 2 - ; CHECK: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 3 - ; CHECK: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 4 - ; CHECK: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 5 - ; CHECK: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[C]](s16), [[C1]](s16), [[C2]](s16), [[C3]](s16), [[C4]](s16), [[C5]](s16) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32) + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]] + ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 + ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C2]](s32) + ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C3]], [[SHL1]] + ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 + ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C2]](s32) + ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C5]], [[SHL2]] + ; CHECK: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32), [[OR2]](s32) ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) %0:_(s16) = G_CONSTANT i16 0 %1:_(s16) = G_CONSTANT i16 1 @@ -531,7 +586,16 @@ body: | ; CHECK: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32) ; CHECK: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; CHECK: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) + ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CHECK: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32) + ; CHECK: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; CHECK: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CHECK: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CHECK: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C10]](s32) + ; CHECK: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; CHECK: [[TRUNC8:%[0-9]+]]:_(s56) = G_TRUNC [[MV]](s64) ; CHECK: S_NOP 0, implicit [[TRUNC8]](s56) %0:_(s8) = G_CONSTANT i8 0 @@ -706,12 +770,80 @@ name: test_merge_p3_s16_s16 body: | bb.0: ; CHECK-LABEL: name: test_merge_p3_s16_s16 - ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; CHECK: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; CHECK: [[MV:%[0-9]+]]:_(p3) = G_MERGE_VALUES [[C]](s16), [[C1]](s16) - ; CHECK: $vgpr0 = COPY [[MV]](p3) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32) + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]] + ; CHECK: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](s32) + ; CHECK: $vgpr0 = COPY [[INTTOPTR]](p3) %0:_(s16) = G_CONSTANT i16 0 %1:_(s16) = G_CONSTANT i16 1 %2:_(p3) = G_MERGE_VALUES %0, %1 $vgpr0 = COPY %2 ... + +--- +name: test_merge_s32_s16_s16 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; CHECK-LABEL: name: test_merge_s32_s16_s16 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK: $vgpr0 = COPY [[OR]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s16) = G_TRUNC %0 + %3:_(s16) = G_TRUNC %1 + %4:_(s32) = G_MERGE_VALUES %2, %3 + $vgpr0 = COPY %4 +... + +--- +name: test_merge_s48_s16_s16_s16 +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + + ; CHECK-LABEL: name: test_merge_s48_s16_s16_s16 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C]] + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C]] + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) + ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) + ; CHECK: [[COPY6:%[0-9]+]]:_(s64) = COPY [[MV]](s64) + ; CHECK: $vgpr0_vgpr1 = COPY [[COPY6]](s64) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $vgpr2 + + %3:_(s16) = G_TRUNC %0 + %4:_(s16) = G_TRUNC %1 + %5:_(s16) = G_TRUNC %2 + + %6:_(s48) = G_MERGE_VALUES %3, %4, %5 + %7:_(s64) = G_ANYEXT %6 + $vgpr0_vgpr1 = COPY %7 +... From 0b2ea91d6d162c3d5af824729ff3f925d163f8ac Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 7 Oct 2019 19:07:19 +0000 Subject: [PATCH 144/254] AMDGPU/GlobalISel: Use S_MOV_B64 for inline constants This hides some defects in SIFoldOperands when the immediates are split. llvm-svn: 373943 --- .../AMDGPU/AMDGPUInstructionSelector.cpp | 47 +++++++++++-------- .../GlobalISel/inst-select-constant.mir | 19 ++++---- .../GlobalISel/inst-select-load-smrd.mir | 4 +- 3 files changed, 39 insertions(+), 31 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 28ebbd9101c5e..aa165d4ce21e9 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -1472,31 +1472,38 @@ bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr &I) const { return constrainSelectedInstRegOperands(I, TII, TRI, RBI); } - DebugLoc DL = I.getDebugLoc(); - const TargetRegisterClass *RC = IsSgpr ? &AMDGPU::SReg_32_XM0RegClass : - &AMDGPU::VGPR_32RegClass; - Register LoReg = MRI->createVirtualRegister(RC); - Register HiReg = MRI->createVirtualRegister(RC); - const APInt &Imm = APInt(Size, I.getOperand(1).getImm()); - - BuildMI(*BB, &I, DL, TII.get(Opcode), LoReg) - .addImm(Imm.trunc(32).getZExtValue()); - - BuildMI(*BB, &I, DL, TII.get(Opcode), HiReg) - .addImm(Imm.ashr(32).getZExtValue()); - - const MachineInstr *RS = - BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg) - .addReg(LoReg) - .addImm(AMDGPU::sub0) - .addReg(HiReg) - .addImm(AMDGPU::sub1); + const DebugLoc &DL = I.getDebugLoc(); + + APInt Imm(Size, I.getOperand(1).getImm()); + + MachineInstr *ResInst; + if (IsSgpr && TII.isInlineConstant(Imm)) { + ResInst = BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_MOV_B64), DstReg) + .addImm(I.getOperand(1).getImm()); + } else { + const TargetRegisterClass *RC = IsSgpr ? + &AMDGPU::SReg_32_XM0RegClass : &AMDGPU::VGPR_32RegClass; + Register LoReg = MRI->createVirtualRegister(RC); + Register HiReg = MRI->createVirtualRegister(RC); + + BuildMI(*BB, &I, DL, TII.get(Opcode), LoReg) + .addImm(Imm.trunc(32).getZExtValue()); + + BuildMI(*BB, &I, DL, TII.get(Opcode), HiReg) + .addImm(Imm.ashr(32).getZExtValue()); + + ResInst = BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg) + .addReg(LoReg) + .addImm(AMDGPU::sub0) + .addReg(HiReg) + .addImm(AMDGPU::sub1); + } // We can't call constrainSelectedInstRegOperands here, because it doesn't // work for target independent opcodes I.eraseFromParent(); const TargetRegisterClass *DstRC = - TRI.getConstrainedRegClassForOperand(RS->getOperand(0), *MRI); + TRI.getConstrainedRegClassForOperand(ResInst->getOperand(0), *MRI); if (!DstRC) return true; return RBI.constrainGenericRegister(DstReg, *DstRC, *MRI); diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-constant.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-constant.mir index b97f9d384aa91..2acf1aeb5a7c4 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-constant.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-constant.mir @@ -5,6 +5,7 @@ name: constant legalized: true regBankSelected: true +tracksRegLiveness: true body: | @@ -25,28 +26,30 @@ body: | ; GCN: %{{[0-9]+}}:sreg_32 = S_MOV_B32 1065353216 %4:sgpr(s32) = G_FCONSTANT float 1.0 + ; GCN: %5:sreg_64_xexec = S_MOV_B64 4607182418800017408 + %5:sgpr(s64) = G_FCONSTANT double 1.0 + ; GCN: [[LO1:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 0 - ; GCN: [[HI1:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 1072693248 + ; GCN: [[HI1:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 1076101120 ; GCN: %{{[0-9]+}}:sreg_64_xexec = REG_SEQUENCE [[LO1]], %subreg.sub0, [[HI1]], %subreg.sub1 - %5:sgpr(s64) = G_FCONSTANT double 1.0 + %6:sgpr(s64) = G_FCONSTANT double 10.0 ; GCN: %{{[0-9]+}}:vgpr_32 = V_MOV_B32_e32 1 - %6:vgpr(s32) = G_CONSTANT i32 1 + %7:vgpr(s32) = G_CONSTANT i32 1 ; GCN: [[LO2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0 ; GCN: [[HI2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1 ; GCN: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE [[LO2]], %subreg.sub0, [[HI2]], %subreg.sub1 - %7:vgpr(s64) = G_CONSTANT i64 4294967296 + %8:vgpr(s64) = G_CONSTANT i64 4294967296 ; GCN: %{{[0-9]+}}:vgpr_32 = V_MOV_B32_e32 1065353216 - %8:vgpr(s32) = G_FCONSTANT float 1.0 + %9:vgpr(s32) = G_FCONSTANT float 1.0 ; GCN: [[LO3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0 ; GCN: [[HI3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1072693248 ; GCN: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE [[LO3]], %subreg.sub0, [[HI3]], %subreg.sub1 - %9:vgpr(s64) = G_FCONSTANT double 1.0 + %10:vgpr(s64) = G_FCONSTANT double 1.0 - S_ENDPGM 0, implicit %2, implicit %4, implicit %6, implicit %8, implicit %3, implicit %5, implicit %7, implicit %9 + S_ENDPGM 0, implicit %2, implicit %4, implicit %5, implicit %6, implicit %8, implicit %3, implicit %5, implicit %7, implicit %9, implicit %10 ... - diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir index 367c92b5243e4..30cb3f032d76c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir @@ -190,9 +190,7 @@ body: | # Test a load of an offset from a constant base address # GCN-LABEL: name: constant_address_positive{{$}} -# GCN: %4:sreg_32_xm0 = S_MOV_B32 44 -# GCN: %5:sreg_32_xm0 = S_MOV_B32 0 -# GCN: %0:sreg_64 = REG_SEQUENCE %4, %subreg.sub0, %5, %subreg.sub1 +# GCN: %0:sreg_64 = S_MOV_B64 44 # VI: %3:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 64, 0, 0 :: (dereferenceable invariant load 4, addrspace 4) # SICI: %3:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 16, 0, 0 :: (dereferenceable invariant load 4, addrspace 4) From 09ec6918bc737bd3193e3cb1f7b65611ee85facb Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 7 Oct 2019 19:10:43 +0000 Subject: [PATCH 145/254] AMDGPU/GlobalISel: Select VALU G_AMDGPU_FFBH_U32 llvm-svn: 373944 --- llvm/lib/Target/AMDGPU/VOP1Instructions.td | 2 +- .../GlobalISel/inst-select-amdgpu-ffbh-u32.mir | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td index bea0c7bd080d2..e1e35c9ba13af 100644 --- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -235,7 +235,7 @@ defm V_COS_F32 : VOP1Inst <"v_cos_f32", VOP_F32_F32, AMDGPUcos>; defm V_NOT_B32 : VOP1Inst <"v_not_b32", VOP_I32_I32>; defm V_BFREV_B32 : VOP1Inst <"v_bfrev_b32", VOP_I32_I32, bitreverse>; -defm V_FFBH_U32 : VOP1Inst <"v_ffbh_u32", VOP_I32_I32>; +defm V_FFBH_U32 : VOP1Inst <"v_ffbh_u32", VOP_I32_I32, AMDGPUffbh_u32>; defm V_FFBL_B32 : VOP1Inst <"v_ffbl_b32", VOP_I32_I32>; defm V_FFBH_I32 : VOP1Inst <"v_ffbh_i32", VOP_I32_I32, AMDGPUffbh_i32>; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-ffbh-u32.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-ffbh-u32.mir index cefd876daa82e..026b6648af831 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-ffbh-u32.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-ffbh-u32.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -march=amdgcn -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 %s -o - | FileCheck %s +# RUN: llc -march=amdgcn -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --- @@ -36,9 +36,9 @@ body: | ; CHECK-LABEL: name: ffbh_u32_s32_v_v ; CHECK: liveins: $vgpr0 - ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[AMDGPU_FFBH_U32_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_FFBH_U32 [[COPY]](s32) - ; CHECK: S_ENDPGM 0, implicit [[AMDGPU_FFBH_U32_]](s32) + ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK: [[V_FFBH_U32_e64_:%[0-9]+]]:vgpr_32 = V_FFBH_U32_e64 [[COPY]], implicit $exec + ; CHECK: S_ENDPGM 0, implicit [[V_FFBH_U32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_AMDGPU_FFBH_U32 %0 S_ENDPGM 0, implicit %1 @@ -58,9 +58,9 @@ body: | ; CHECK-LABEL: name: ffbh_u32_v_s ; CHECK: liveins: $sgpr0 - ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK: [[AMDGPU_FFBH_U32_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_FFBH_U32 [[COPY]](s32) - ; CHECK: S_ENDPGM 0, implicit [[AMDGPU_FFBH_U32_]](s32) + ; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; CHECK: [[V_FFBH_U32_e64_:%[0-9]+]]:vgpr_32 = V_FFBH_U32_e64 [[COPY]], implicit $exec + ; CHECK: S_ENDPGM 0, implicit [[V_FFBH_U32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = G_AMDGPU_FFBH_U32 %0 S_ENDPGM 0, implicit %1 From 1237aa2996c200ca5ebb448f5145fc8ce7f1646a Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 7 Oct 2019 19:10:44 +0000 Subject: [PATCH 146/254] AMDGPU/GlobalISel: Fix selection of 16-bit shifts llvm-svn: 373945 --- llvm/lib/Target/AMDGPU/VOP2Instructions.td | 9 +- .../GlobalISel/inst-select-ashr.s16.mir | 368 +++++++++++++----- .../GlobalISel/inst-select-lshr.s16.mir | 368 +++++++++++++----- .../AMDGPU/GlobalISel/inst-select-shl.s16.mir | 368 +++++++++++++----- 4 files changed, 816 insertions(+), 297 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td index e5f215fbcd719..15604aa13533b 100644 --- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -752,19 +752,22 @@ multiclass Bits_OpsRev_i16_Pats ; def : GCNPat< (i32 (zext (op i16:$src0, i16:$src1))), - !if(!eq(PreservesHI16,1), (ClearHI16 (inst $src1, $src0)), (inst $src1, $src0)) + !if(!eq(PreservesHI16,1), (ClearHI16 (inst VSrc_b32:$src1, VSrc_b32:$src0)), + (inst VSrc_b32:$src1, VSrc_b32:$src0)) >; def : GCNPat< (i64 (zext (op i16:$src0, i16:$src1))), (REG_SEQUENCE VReg_64, - !if(!eq(PreservesHI16,1), (ClearHI16 (inst $src1, $src0)), (inst $src1, $src0)), + !if(!eq(PreservesHI16,1), (ClearHI16 (inst VSrc_b32:$src1, VSrc_b32:$src0)), + (inst VSrc_b32:$src1, VSrc_b32:$src0)), sub0, (V_MOV_B32_e32 (i32 0)), sub1) >; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.s16.mir index 1a90e609f7bd3..e1ce7872e9355 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.s16.mir @@ -10,51 +10,258 @@ # RUN: FileCheck -check-prefixes=ERR-GFX910,ERR %s < %t # ERR-NOT: remark -# ERR-GFX8: remark: :0:0: cannot select: %3:sgpr(s16) = G_ASHR %2:sgpr, %1:sgpr(s32) (in function: ashr_s16_ss) -# ERR-GFX8-NEXT: remark: :0:0: cannot select: %3:vgpr(s16) = G_ASHR %2:sgpr, %1:vgpr(s32) (in function: ashr_s16_sv) -# ERR-GFX8-NEXT: remark: :0:0: cannot select: %3:vgpr(s16) = G_ASHR %2:vgpr, %1:sgpr(s32) (in function: ashr_s16_vs) -# ERR-GFX8-NEXT: remark: :0:0: cannot select: %3:vgpr(s16) = G_ASHR %2:vgpr, %1:vgpr(s32) (in function: ashr_s16_vv) +# ERR: remark: :0:0: cannot select: %4:sgpr(s16) = G_ASHR %2:sgpr, %3:sgpr(s16) (in function: ashr_s16_s16_ss) +# ERR-NEXT: remark: :0:0: cannot select: %3:vgpr(s16) = G_ASHR %2:vgpr, %1:vgpr(s32) (in function: ashr_s16_s32_vv) +# ERR-NEXT: remark: :0:0: cannot select: %5:vgpr(s64) = G_ZEXT %4:vgpr(s16) (in function: ashr_s16_vv_zext_to_s64) +# ERR-NEXT: remark: :0:0: cannot select: %3:sgpr(s16) = G_ASHR %2:sgpr, %1:sgpr(s32) (in function: ashr_s16_s32_ss) +# ERR-NEXT: remark: :0:0: cannot select: %3:vgpr(s16) = G_ASHR %2:sgpr, %1:vgpr(s32) (in function: ashr_s16_s32_sv) +# ERR-NEXT: remark: :0:0: cannot select: %3:vgpr(s16) = G_ASHR %2:vgpr, %1:sgpr(s32) (in function: ashr_s16_s32_vs) +# ERR-NOT: remark -# ERR-GFX910: remark: :0:0: cannot select: %3:sgpr(s16) = G_ASHR %2:sgpr, %1:sgpr(s32) (in function: ashr_s16_ss) -# ERR-GFX910-NEXT: remark: :0:0: cannot select: %3:vgpr(s16) = G_ASHR %2:sgpr, %1:vgpr(s32) (in function: ashr_s16_sv) -# ERR-GFX910-NEXT: remark: :0:0: cannot select: %3:vgpr(s16) = G_ASHR %2:vgpr, %1:sgpr(s32) (in function: ashr_s16_vs) -# ERR-GFX910-NEXT: remark: :0:0: cannot select: %3:vgpr(s16) = G_ASHR %2:vgpr, %1:vgpr(s32) (in function: ashr_s16_vv) +--- +name: ashr_s16_s16_ss +legalized: true +regBankSelected: true -# ERR-NOT: remark +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + + ; GFX8-LABEL: name: ashr_s16_s16_ss + ; GFX8: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GFX8: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX8: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8: [[ASHR:%[0-9]+]]:sgpr(s16) = G_ASHR [[TRUNC]], [[TRUNC1]](s16) + ; GFX8: S_ENDPGM 0, implicit [[ASHR]](s16) + ; GFX9-LABEL: name: ashr_s16_s16_ss + ; GFX9: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GFX9: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX9: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9: [[ASHR:%[0-9]+]]:sgpr(s16) = G_ASHR [[TRUNC]], [[TRUNC1]](s16) + ; GFX9: S_ENDPGM 0, implicit [[ASHR]](s16) + ; GFX10-LABEL: name: ashr_s16_s16_ss + ; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GFX10: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX10: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) + ; GFX10: [[ASHR:%[0-9]+]]:sgpr(s16) = G_ASHR [[TRUNC]], [[TRUNC1]](s16) + ; GFX10: S_ENDPGM 0, implicit [[ASHR]](s16) + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s32) = COPY $sgpr1 + %2:sgpr(s16) = G_TRUNC %0 + %3:sgpr(s16) = G_TRUNC %1 + %4:sgpr(s16) = G_ASHR %2, %3 + S_ENDPGM 0, implicit %4 +... --- -name: ashr_s16_ss +name: ashr_s16_s16_vs +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0 + ; GFX8-LABEL: name: ashr_s16_s16_vs + ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GFX8: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8: S_ENDPGM 0, implicit [[V_ASHRREV_I16_e64_]] + ; GFX9-LABEL: name: ashr_s16_s16_vs + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GFX9: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9: S_ENDPGM 0, implicit [[V_ASHRREV_I16_e64_]] + ; GFX10-LABEL: name: ashr_s16_s16_vs + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65535, implicit $exec + ; GFX10: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[V_ASHRREV_I16_e64_]], [[V_MOV_B32_e32_]], implicit $exec + ; GFX10: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:sgpr(s32) = COPY $sgpr0 + %2:vgpr(s16) = G_TRUNC %0 + %3:sgpr(s16) = G_TRUNC %1 + %4:vgpr(s16) = G_ASHR %2, %3 + S_ENDPGM 0, implicit %4 +... + +--- +name: ashr_s16_s32_vv +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX8-LABEL: name: ashr_s16_s32_vv + ; GFX8: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX8: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX8: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32) + ; GFX8: S_ENDPGM 0, implicit [[ASHR]](s16) + ; GFX9-LABEL: name: ashr_s16_s32_vv + ; GFX9: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX9: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX9: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32) + ; GFX9: S_ENDPGM 0, implicit [[ASHR]](s16) + ; GFX10-LABEL: name: ashr_s16_s32_vv + ; GFX10: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX10: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX10: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32) + ; GFX10: S_ENDPGM 0, implicit [[ASHR]](s16) + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s16) = G_TRUNC %0 + %3:vgpr(s16) = G_ASHR %2, %1 + S_ENDPGM 0, implicit %3 +... + +--- +name: ashr_s16_s16_vv +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX8-LABEL: name: ashr_s16_s16_vv + ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8: S_ENDPGM 0, implicit [[V_ASHRREV_I16_e64_]] + ; GFX9-LABEL: name: ashr_s16_s16_vv + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX9: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9: S_ENDPGM 0, implicit [[V_ASHRREV_I16_e64_]] + ; GFX10-LABEL: name: ashr_s16_s16_vv + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65535, implicit $exec + ; GFX10: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[V_ASHRREV_I16_e64_]], [[V_MOV_B32_e32_]], implicit $exec + ; GFX10: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s16) = G_TRUNC %0 + %3:vgpr(s16) = G_TRUNC %1 + %4:vgpr(s16) = G_ASHR %2, %3 + S_ENDPGM 0, implicit %4 +... + +--- +name: ashr_s16_s16_vv_zext_to_s32 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX8-LABEL: name: ashr_s16_s16_vv_zext_to_s32 + ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8: [[V_BFE_U32_:%[0-9]+]]:vgpr_32 = V_BFE_U32 [[V_ASHRREV_I16_e64_]], 0, 16, implicit $exec + ; GFX8: S_ENDPGM 0, implicit [[V_BFE_U32_]] + ; GFX9-LABEL: name: ashr_s16_s16_vv_zext_to_s32 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX9: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9: [[V_BFE_U32_:%[0-9]+]]:vgpr_32 = V_BFE_U32 [[V_ASHRREV_I16_e64_]], 0, 16, implicit $exec + ; GFX9: S_ENDPGM 0, implicit [[V_BFE_U32_]] + ; GFX10-LABEL: name: ashr_s16_s16_vv_zext_to_s32 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65535, implicit $exec + ; GFX10: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[V_ASHRREV_I16_e64_]], [[V_MOV_B32_e32_]], implicit $exec + ; GFX10: [[V_BFE_U32_:%[0-9]+]]:vgpr_32 = V_BFE_U32 [[V_AND_B32_e64_]], 0, 16, implicit $exec + ; GFX10: S_ENDPGM 0, implicit [[V_BFE_U32_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s16) = G_TRUNC %0 + %3:vgpr(s16) = G_TRUNC %1 + %4:vgpr(s16) = G_ASHR %2, %3 + %5:vgpr(s32) = G_ZEXT %4 + S_ENDPGM 0, implicit %5 +... + +--- +name: ashr_s16_vv_zext_to_s64 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX8-LABEL: name: ashr_s16_vv_zext_to_s64 + ; GFX8: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX8: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX8: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[TRUNC1]](s16) + ; GFX8: [[ZEXT:%[0-9]+]]:vgpr(s64) = G_ZEXT [[ASHR]](s16) + ; GFX8: S_ENDPGM 0, implicit [[ZEXT]](s64) + ; GFX9-LABEL: name: ashr_s16_vv_zext_to_s64 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX9: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX9: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[TRUNC1]](s16) + ; GFX9: [[ZEXT:%[0-9]+]]:vgpr(s64) = G_ZEXT [[ASHR]](s16) + ; GFX9: S_ENDPGM 0, implicit [[ZEXT]](s64) + ; GFX10-LABEL: name: ashr_s16_vv_zext_to_s64 + ; GFX10: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX10: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX10: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32) + ; GFX10: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[TRUNC1]](s16) + ; GFX10: [[ZEXT:%[0-9]+]]:vgpr(s64) = G_ZEXT [[ASHR]](s16) + ; GFX10: S_ENDPGM 0, implicit [[ZEXT]](s64) + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s16) = G_TRUNC %0 + %3:vgpr(s16) = G_TRUNC %1 + %4:vgpr(s16) = G_ASHR %2, %3 + %5:vgpr(s64) = G_ZEXT %4 + S_ENDPGM 0, implicit %5 +... + +--- +name: ashr_s16_s32_ss legalized: true regBankSelected: true body: | bb.0: liveins: $sgpr0, $sgpr1 - ; GFX6-LABEL: name: ashr_s16_ss - ; GFX6: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX6: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX6: [[ASHR:%[0-9]+]]:sgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32) - ; GFX6: S_ENDPGM 0, implicit [[ASHR]](s16) - ; GFX7-LABEL: name: ashr_s16_ss - ; GFX7: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX7: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX7: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX7: [[ASHR:%[0-9]+]]:sgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32) - ; GFX7: S_ENDPGM 0, implicit [[ASHR]](s16) - ; GFX8-LABEL: name: ashr_s16_ss + + ; GFX8-LABEL: name: ashr_s16_s32_ss ; GFX8: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX8: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; GFX8: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) ; GFX8: [[ASHR:%[0-9]+]]:sgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32) ; GFX8: S_ENDPGM 0, implicit [[ASHR]](s16) - ; GFX9-LABEL: name: ashr_s16_ss + ; GFX9-LABEL: name: ashr_s16_s32_ss ; GFX9: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX9: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; GFX9: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) ; GFX9: [[ASHR:%[0-9]+]]:sgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32) ; GFX9: S_ENDPGM 0, implicit [[ASHR]](s16) - ; GFX10-LABEL: name: ashr_s16_ss + ; GFX10-LABEL: name: ashr_s16_s32_ss ; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX10: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; GFX10: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) @@ -68,38 +275,26 @@ body: | ... --- -name: ashr_s16_sv +name: ashr_s16_s32_sv legalized: true regBankSelected: true body: | bb.0: liveins: $sgpr0, $vgpr0 - ; GFX6-LABEL: name: ashr_s16_sv - ; GFX6: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX6: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX6: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32) - ; GFX6: S_ENDPGM 0, implicit [[ASHR]](s16) - ; GFX7-LABEL: name: ashr_s16_sv - ; GFX7: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX7: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX7: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX7: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32) - ; GFX7: S_ENDPGM 0, implicit [[ASHR]](s16) - ; GFX8-LABEL: name: ashr_s16_sv + ; GFX8-LABEL: name: ashr_s16_s32_sv ; GFX8: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX8: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GFX8: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) ; GFX8: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32) ; GFX8: S_ENDPGM 0, implicit [[ASHR]](s16) - ; GFX9-LABEL: name: ashr_s16_sv + ; GFX9-LABEL: name: ashr_s16_s32_sv ; GFX9: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX9: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GFX9: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) ; GFX9: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32) ; GFX9: S_ENDPGM 0, implicit [[ASHR]](s16) - ; GFX10-LABEL: name: ashr_s16_sv + ; GFX10-LABEL: name: ashr_s16_s32_sv ; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX10: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GFX10: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) @@ -113,90 +308,67 @@ body: | ... --- -name: ashr_s16_vs +name: ashr_s16_s16_sv legalized: true regBankSelected: true body: | bb.0: liveins: $sgpr0, $vgpr0 - ; GFX6-LABEL: name: ashr_s16_vs - ; GFX6: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX6: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX6: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32) - ; GFX6: S_ENDPGM 0, implicit [[ASHR]](s16) - ; GFX7-LABEL: name: ashr_s16_vs - ; GFX7: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX7: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX7: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX7: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32) - ; GFX7: S_ENDPGM 0, implicit [[ASHR]](s16) - ; GFX8-LABEL: name: ashr_s16_vs - ; GFX8: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX8: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX8: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32) - ; GFX8: S_ENDPGM 0, implicit [[ASHR]](s16) - ; GFX9-LABEL: name: ashr_s16_vs - ; GFX9: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX9: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32) - ; GFX9: S_ENDPGM 0, implicit [[ASHR]](s16) - ; GFX10-LABEL: name: ashr_s16_vs - ; GFX10: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX10: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX10: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX10: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32) - ; GFX10: S_ENDPGM 0, implicit [[ASHR]](s16) - %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr(s32) = COPY $sgpr0 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_ASHR %2, %1 - S_ENDPGM 0, implicit %3 + ; GFX8-LABEL: name: ashr_s16_s16_sv + ; GFX8: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8: S_ENDPGM 0, implicit [[V_ASHRREV_I16_e64_]] + ; GFX9-LABEL: name: ashr_s16_s16_sv + ; GFX9: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9: S_ENDPGM 0, implicit [[V_ASHRREV_I16_e64_]] + ; GFX10-LABEL: name: ashr_s16_s16_sv + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65535, implicit $exec + ; GFX10: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[V_ASHRREV_I16_e64_]], [[V_MOV_B32_e32_]], implicit $exec + ; GFX10: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:vgpr(s32) = COPY $vgpr0 + %2:sgpr(s16) = G_TRUNC %0 + %3:vgpr(s16) = G_TRUNC %1 + %4:vgpr(s16) = G_ASHR %2, %3 + S_ENDPGM 0, implicit %4 ... --- -name: ashr_s16_vv +name: ashr_s16_s32_vs legalized: true regBankSelected: true body: | bb.0: - liveins: $vgpr0, $vgpr1 - ; GFX6-LABEL: name: ashr_s16_vv - ; GFX6: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX6: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX6: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32) - ; GFX6: S_ENDPGM 0, implicit [[ASHR]](s16) - ; GFX7-LABEL: name: ashr_s16_vv - ; GFX7: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX7: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX7: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX7: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32) - ; GFX7: S_ENDPGM 0, implicit [[ASHR]](s16) - ; GFX8-LABEL: name: ashr_s16_vv + liveins: $sgpr0, $vgpr0 + ; GFX8-LABEL: name: ashr_s16_s32_vs ; GFX8: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX8: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX8: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) ; GFX8: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32) ; GFX8: S_ENDPGM 0, implicit [[ASHR]](s16) - ; GFX9-LABEL: name: ashr_s16_vv + ; GFX9-LABEL: name: ashr_s16_s32_vs ; GFX9: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX9: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) ; GFX9: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32) ; GFX9: S_ENDPGM 0, implicit [[ASHR]](s16) - ; GFX10-LABEL: name: ashr_s16_vv + ; GFX10-LABEL: name: ashr_s16_s32_vs ; GFX10: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX10: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX10: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX10: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) ; GFX10: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32) ; GFX10: S_ENDPGM 0, implicit [[ASHR]](s16) %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 + %1:sgpr(s32) = COPY $sgpr0 %2:vgpr(s16) = G_TRUNC %0 %3:vgpr(s16) = G_ASHR %2, %1 S_ENDPGM 0, implicit %3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.s16.mir index 2a2f600c5b7c6..65e9af7719665 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.s16.mir @@ -10,51 +10,258 @@ # RUN: FileCheck -check-prefixes=ERR-GFX910,ERR %s < %t # ERR-NOT: remark -# ERR-GFX8: remark: :0:0: cannot select: %3:sgpr(s16) = G_LSHR %2:sgpr, %1:sgpr(s32) (in function: lshr_s16_ss) -# ERR-GFX8-NEXT: remark: :0:0: cannot select: %3:vgpr(s16) = G_LSHR %2:sgpr, %1:vgpr(s32) (in function: lshr_s16_sv) -# ERR-GFX8-NEXT: remark: :0:0: cannot select: %3:vgpr(s16) = G_LSHR %2:vgpr, %1:sgpr(s32) (in function: lshr_s16_vs) -# ERR-GFX8-NEXT: remark: :0:0: cannot select: %3:vgpr(s16) = G_LSHR %2:vgpr, %1:vgpr(s32) (in function: lshr_s16_vv) +# ERR: remark: :0:0: cannot select: %4:sgpr(s16) = G_LSHR %2:sgpr, %3:sgpr(s16) (in function: lshr_s16_s16_ss) +# ERR-NEXT: remark: :0:0: cannot select: %3:vgpr(s16) = G_LSHR %2:vgpr, %1:vgpr(s32) (in function: lshr_s16_s32_vv) +# ERR-NEXT: remark: :0:0: cannot select: %5:vgpr(s64) = G_ZEXT %4:vgpr(s16) (in function: lshr_s16_vv_zext_to_s64) +# ERR-NEXT: remark: :0:0: cannot select: %3:sgpr(s16) = G_LSHR %2:sgpr, %1:sgpr(s32) (in function: lshr_s16_s32_ss) +# ERR-NEXT: remark: :0:0: cannot select: %3:vgpr(s16) = G_LSHR %2:sgpr, %1:vgpr(s32) (in function: lshr_s16_s32_sv) +# ERR-NEXT: remark: :0:0: cannot select: %3:vgpr(s16) = G_LSHR %2:vgpr, %1:sgpr(s32) (in function: lshr_s16_s32_vs) +# ERR-NOT: remark -# ERR-GFX910: remark: :0:0: cannot select: %3:sgpr(s16) = G_LSHR %2:sgpr, %1:sgpr(s32) (in function: lshr_s16_ss) -# ERR-GFX910-NEXT: remark: :0:0: cannot select: %3:vgpr(s16) = G_LSHR %2:sgpr, %1:vgpr(s32) (in function: lshr_s16_sv) -# ERR-GFX910-NEXT: remark: :0:0: cannot select: %3:vgpr(s16) = G_LSHR %2:vgpr, %1:sgpr(s32) (in function: lshr_s16_vs) -# ERR-GFX910-NEXT: remark: :0:0: cannot select: %3:vgpr(s16) = G_LSHR %2:vgpr, %1:vgpr(s32) (in function: lshr_s16_vv) +--- +name: lshr_s16_s16_ss +legalized: true +regBankSelected: true -# ERR-NOT: remark +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + + ; GFX8-LABEL: name: lshr_s16_s16_ss + ; GFX8: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GFX8: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX8: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8: [[LSHR:%[0-9]+]]:sgpr(s16) = G_LSHR [[TRUNC]], [[TRUNC1]](s16) + ; GFX8: S_ENDPGM 0, implicit [[LSHR]](s16) + ; GFX9-LABEL: name: lshr_s16_s16_ss + ; GFX9: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GFX9: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX9: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9: [[LSHR:%[0-9]+]]:sgpr(s16) = G_LSHR [[TRUNC]], [[TRUNC1]](s16) + ; GFX9: S_ENDPGM 0, implicit [[LSHR]](s16) + ; GFX10-LABEL: name: lshr_s16_s16_ss + ; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GFX10: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX10: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) + ; GFX10: [[LSHR:%[0-9]+]]:sgpr(s16) = G_LSHR [[TRUNC]], [[TRUNC1]](s16) + ; GFX10: S_ENDPGM 0, implicit [[LSHR]](s16) + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s32) = COPY $sgpr1 + %2:sgpr(s16) = G_TRUNC %0 + %3:sgpr(s16) = G_TRUNC %1 + %4:sgpr(s16) = G_LSHR %2, %3 + S_ENDPGM 0, implicit %4 +... --- -name: lshr_s16_ss +name: lshr_s16_s16_vs +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0 + ; GFX8-LABEL: name: lshr_s16_s16_vs + ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GFX8: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8: S_ENDPGM 0, implicit [[V_LSHRREV_B16_e64_]] + ; GFX9-LABEL: name: lshr_s16_s16_vs + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GFX9: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9: S_ENDPGM 0, implicit [[V_LSHRREV_B16_e64_]] + ; GFX10-LABEL: name: lshr_s16_s16_vs + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65535, implicit $exec + ; GFX10: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[V_LSHRREV_B16_e64_]], [[V_MOV_B32_e32_]], implicit $exec + ; GFX10: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:sgpr(s32) = COPY $sgpr0 + %2:vgpr(s16) = G_TRUNC %0 + %3:sgpr(s16) = G_TRUNC %1 + %4:vgpr(s16) = G_LSHR %2, %3 + S_ENDPGM 0, implicit %4 +... + +--- +name: lshr_s16_s32_vv +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX8-LABEL: name: lshr_s16_s32_vv + ; GFX8: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX8: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX8: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32) + ; GFX8: S_ENDPGM 0, implicit [[LSHR]](s16) + ; GFX9-LABEL: name: lshr_s16_s32_vv + ; GFX9: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX9: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX9: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32) + ; GFX9: S_ENDPGM 0, implicit [[LSHR]](s16) + ; GFX10-LABEL: name: lshr_s16_s32_vv + ; GFX10: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX10: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX10: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32) + ; GFX10: S_ENDPGM 0, implicit [[LSHR]](s16) + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s16) = G_TRUNC %0 + %3:vgpr(s16) = G_LSHR %2, %1 + S_ENDPGM 0, implicit %3 +... + +--- +name: lshr_s16_s16_vv +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX8-LABEL: name: lshr_s16_s16_vv + ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8: S_ENDPGM 0, implicit [[V_LSHRREV_B16_e64_]] + ; GFX9-LABEL: name: lshr_s16_s16_vv + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX9: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9: S_ENDPGM 0, implicit [[V_LSHRREV_B16_e64_]] + ; GFX10-LABEL: name: lshr_s16_s16_vv + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65535, implicit $exec + ; GFX10: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[V_LSHRREV_B16_e64_]], [[V_MOV_B32_e32_]], implicit $exec + ; GFX10: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s16) = G_TRUNC %0 + %3:vgpr(s16) = G_TRUNC %1 + %4:vgpr(s16) = G_LSHR %2, %3 + S_ENDPGM 0, implicit %4 +... + +--- +name: lshr_s16_s16_vv_zext_to_s32 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX8-LABEL: name: lshr_s16_s16_vv_zext_to_s32 + ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8: [[V_BFE_U32_:%[0-9]+]]:vgpr_32 = V_BFE_U32 [[V_LSHRREV_B16_e64_]], 0, 16, implicit $exec + ; GFX8: S_ENDPGM 0, implicit [[V_BFE_U32_]] + ; GFX9-LABEL: name: lshr_s16_s16_vv_zext_to_s32 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX9: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9: [[V_BFE_U32_:%[0-9]+]]:vgpr_32 = V_BFE_U32 [[V_LSHRREV_B16_e64_]], 0, 16, implicit $exec + ; GFX9: S_ENDPGM 0, implicit [[V_BFE_U32_]] + ; GFX10-LABEL: name: lshr_s16_s16_vv_zext_to_s32 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65535, implicit $exec + ; GFX10: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[V_LSHRREV_B16_e64_]], [[V_MOV_B32_e32_]], implicit $exec + ; GFX10: [[V_BFE_U32_:%[0-9]+]]:vgpr_32 = V_BFE_U32 [[V_AND_B32_e64_]], 0, 16, implicit $exec + ; GFX10: S_ENDPGM 0, implicit [[V_BFE_U32_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s16) = G_TRUNC %0 + %3:vgpr(s16) = G_TRUNC %1 + %4:vgpr(s16) = G_LSHR %2, %3 + %5:vgpr(s32) = G_ZEXT %4 + S_ENDPGM 0, implicit %5 +... + +--- +name: lshr_s16_vv_zext_to_s64 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX8-LABEL: name: lshr_s16_vv_zext_to_s64 + ; GFX8: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX8: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX8: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[TRUNC1]](s16) + ; GFX8: [[ZEXT:%[0-9]+]]:vgpr(s64) = G_ZEXT [[LSHR]](s16) + ; GFX8: S_ENDPGM 0, implicit [[ZEXT]](s64) + ; GFX9-LABEL: name: lshr_s16_vv_zext_to_s64 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX9: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX9: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[TRUNC1]](s16) + ; GFX9: [[ZEXT:%[0-9]+]]:vgpr(s64) = G_ZEXT [[LSHR]](s16) + ; GFX9: S_ENDPGM 0, implicit [[ZEXT]](s64) + ; GFX10-LABEL: name: lshr_s16_vv_zext_to_s64 + ; GFX10: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX10: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX10: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32) + ; GFX10: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[TRUNC1]](s16) + ; GFX10: [[ZEXT:%[0-9]+]]:vgpr(s64) = G_ZEXT [[LSHR]](s16) + ; GFX10: S_ENDPGM 0, implicit [[ZEXT]](s64) + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s16) = G_TRUNC %0 + %3:vgpr(s16) = G_TRUNC %1 + %4:vgpr(s16) = G_LSHR %2, %3 + %5:vgpr(s64) = G_ZEXT %4 + S_ENDPGM 0, implicit %5 +... + +--- +name: lshr_s16_s32_ss legalized: true regBankSelected: true body: | bb.0: liveins: $sgpr0, $sgpr1 - ; GFX6-LABEL: name: lshr_s16_ss - ; GFX6: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX6: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX6: [[LSHR:%[0-9]+]]:sgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32) - ; GFX6: S_ENDPGM 0, implicit [[LSHR]](s16) - ; GFX7-LABEL: name: lshr_s16_ss - ; GFX7: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX7: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX7: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX7: [[LSHR:%[0-9]+]]:sgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32) - ; GFX7: S_ENDPGM 0, implicit [[LSHR]](s16) - ; GFX8-LABEL: name: lshr_s16_ss + + ; GFX8-LABEL: name: lshr_s16_s32_ss ; GFX8: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX8: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; GFX8: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) ; GFX8: [[LSHR:%[0-9]+]]:sgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32) ; GFX8: S_ENDPGM 0, implicit [[LSHR]](s16) - ; GFX9-LABEL: name: lshr_s16_ss + ; GFX9-LABEL: name: lshr_s16_s32_ss ; GFX9: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX9: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; GFX9: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) ; GFX9: [[LSHR:%[0-9]+]]:sgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32) ; GFX9: S_ENDPGM 0, implicit [[LSHR]](s16) - ; GFX10-LABEL: name: lshr_s16_ss + ; GFX10-LABEL: name: lshr_s16_s32_ss ; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX10: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; GFX10: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) @@ -68,38 +275,26 @@ body: | ... --- -name: lshr_s16_sv +name: lshr_s16_s32_sv legalized: true regBankSelected: true body: | bb.0: liveins: $sgpr0, $vgpr0 - ; GFX6-LABEL: name: lshr_s16_sv - ; GFX6: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX6: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX6: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32) - ; GFX6: S_ENDPGM 0, implicit [[LSHR]](s16) - ; GFX7-LABEL: name: lshr_s16_sv - ; GFX7: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX7: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX7: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX7: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32) - ; GFX7: S_ENDPGM 0, implicit [[LSHR]](s16) - ; GFX8-LABEL: name: lshr_s16_sv + ; GFX8-LABEL: name: lshr_s16_s32_sv ; GFX8: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX8: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GFX8: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) ; GFX8: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32) ; GFX8: S_ENDPGM 0, implicit [[LSHR]](s16) - ; GFX9-LABEL: name: lshr_s16_sv + ; GFX9-LABEL: name: lshr_s16_s32_sv ; GFX9: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX9: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GFX9: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) ; GFX9: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32) ; GFX9: S_ENDPGM 0, implicit [[LSHR]](s16) - ; GFX10-LABEL: name: lshr_s16_sv + ; GFX10-LABEL: name: lshr_s16_s32_sv ; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX10: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GFX10: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) @@ -113,90 +308,67 @@ body: | ... --- -name: lshr_s16_vs +name: lshr_s16_s16_sv legalized: true regBankSelected: true body: | bb.0: liveins: $sgpr0, $vgpr0 - ; GFX6-LABEL: name: lshr_s16_vs - ; GFX6: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX6: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX6: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32) - ; GFX6: S_ENDPGM 0, implicit [[LSHR]](s16) - ; GFX7-LABEL: name: lshr_s16_vs - ; GFX7: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX7: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX7: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX7: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32) - ; GFX7: S_ENDPGM 0, implicit [[LSHR]](s16) - ; GFX8-LABEL: name: lshr_s16_vs - ; GFX8: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX8: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX8: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32) - ; GFX8: S_ENDPGM 0, implicit [[LSHR]](s16) - ; GFX9-LABEL: name: lshr_s16_vs - ; GFX9: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX9: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32) - ; GFX9: S_ENDPGM 0, implicit [[LSHR]](s16) - ; GFX10-LABEL: name: lshr_s16_vs - ; GFX10: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX10: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX10: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX10: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32) - ; GFX10: S_ENDPGM 0, implicit [[LSHR]](s16) - %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr(s32) = COPY $sgpr0 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_LSHR %2, %1 - S_ENDPGM 0, implicit %3 + ; GFX8-LABEL: name: lshr_s16_s16_sv + ; GFX8: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8: S_ENDPGM 0, implicit [[V_LSHRREV_B16_e64_]] + ; GFX9-LABEL: name: lshr_s16_s16_sv + ; GFX9: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9: S_ENDPGM 0, implicit [[V_LSHRREV_B16_e64_]] + ; GFX10-LABEL: name: lshr_s16_s16_sv + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65535, implicit $exec + ; GFX10: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[V_LSHRREV_B16_e64_]], [[V_MOV_B32_e32_]], implicit $exec + ; GFX10: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:vgpr(s32) = COPY $vgpr0 + %2:sgpr(s16) = G_TRUNC %0 + %3:vgpr(s16) = G_TRUNC %1 + %4:vgpr(s16) = G_LSHR %2, %3 + S_ENDPGM 0, implicit %4 ... --- -name: lshr_s16_vv +name: lshr_s16_s32_vs legalized: true regBankSelected: true body: | bb.0: - liveins: $vgpr0, $vgpr1 - ; GFX6-LABEL: name: lshr_s16_vv - ; GFX6: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX6: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX6: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32) - ; GFX6: S_ENDPGM 0, implicit [[LSHR]](s16) - ; GFX7-LABEL: name: lshr_s16_vv - ; GFX7: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX7: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX7: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX7: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32) - ; GFX7: S_ENDPGM 0, implicit [[LSHR]](s16) - ; GFX8-LABEL: name: lshr_s16_vv + liveins: $sgpr0, $vgpr0 + ; GFX8-LABEL: name: lshr_s16_s32_vs ; GFX8: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX8: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX8: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) ; GFX8: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32) ; GFX8: S_ENDPGM 0, implicit [[LSHR]](s16) - ; GFX9-LABEL: name: lshr_s16_vv + ; GFX9-LABEL: name: lshr_s16_s32_vs ; GFX9: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX9: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) ; GFX9: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32) ; GFX9: S_ENDPGM 0, implicit [[LSHR]](s16) - ; GFX10-LABEL: name: lshr_s16_vv + ; GFX10-LABEL: name: lshr_s16_s32_vs ; GFX10: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX10: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX10: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX10: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) ; GFX10: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32) ; GFX10: S_ENDPGM 0, implicit [[LSHR]](s16) %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 + %1:sgpr(s32) = COPY $sgpr0 %2:vgpr(s16) = G_TRUNC %0 %3:vgpr(s16) = G_LSHR %2, %1 S_ENDPGM 0, implicit %3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.s16.mir index d41cdee39040f..07b4f9a3dcd91 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.s16.mir @@ -10,51 +10,258 @@ # RUN: FileCheck -check-prefixes=ERR-GFX910,ERR %s < %t # ERR-NOT: remark -# ERR-GFX8: remark: :0:0: cannot select: %3:sgpr(s16) = G_SHL %2:sgpr, %1:sgpr(s32) (in function: shl_s16_ss) -# ERR-GFX8-NEXT: remark: :0:0: cannot select: %3:vgpr(s16) = G_SHL %2:sgpr, %1:vgpr(s32) (in function: shl_s16_sv) -# ERR-GFX8-NEXT: remark: :0:0: cannot select: %3:vgpr(s16) = G_SHL %2:vgpr, %1:sgpr(s32) (in function: shl_s16_vs) -# ERR-GFX8-NEXT: remark: :0:0: cannot select: %3:vgpr(s16) = G_SHL %2:vgpr, %1:vgpr(s32) (in function: shl_s16_vv) +# ERR: remark: :0:0: cannot select: %4:sgpr(s16) = G_SHL %2:sgpr, %3:sgpr(s16) (in function: shl_s16_s16_ss) +# ERR-NEXT: remark: :0:0: cannot select: %3:vgpr(s16) = G_SHL %2:vgpr, %1:vgpr(s32) (in function: shl_s16_s32_vv) +# ERR-NEXT: remark: :0:0: cannot select: %5:vgpr(s64) = G_ZEXT %4:vgpr(s16) (in function: shl_s16_vv_zext_to_s64) +# ERR-NEXT: remark: :0:0: cannot select: %3:sgpr(s16) = G_SHL %2:sgpr, %1:sgpr(s32) (in function: shl_s16_s32_ss) +# ERR-NEXT: remark: :0:0: cannot select: %3:vgpr(s16) = G_SHL %2:sgpr, %1:vgpr(s32) (in function: shl_s16_s32_sv) +# ERR-NEXT: remark: :0:0: cannot select: %3:vgpr(s16) = G_SHL %2:vgpr, %1:sgpr(s32) (in function: shl_s16_s32_vs) +# ERR-NOT: remark -# ERR-GFX910: remark: :0:0: cannot select: %3:sgpr(s16) = G_SHL %2:sgpr, %1:sgpr(s32) (in function: shl_s16_ss) -# ERR-GFX910-NEXT: remark: :0:0: cannot select: %3:vgpr(s16) = G_SHL %2:sgpr, %1:vgpr(s32) (in function: shl_s16_sv) -# ERR-GFX910-NEXT: remark: :0:0: cannot select: %3:vgpr(s16) = G_SHL %2:vgpr, %1:sgpr(s32) (in function: shl_s16_vs) -# ERR-GFX910-NEXT: remark: :0:0: cannot select: %3:vgpr(s16) = G_SHL %2:vgpr, %1:vgpr(s32) (in function: shl_s16_vv) +--- +name: shl_s16_s16_ss +legalized: true +regBankSelected: true -# ERR-NOT: remark +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + + ; GFX8-LABEL: name: shl_s16_s16_ss + ; GFX8: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GFX8: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX8: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8: [[SHL:%[0-9]+]]:sgpr(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16) + ; GFX8: S_ENDPGM 0, implicit [[SHL]](s16) + ; GFX9-LABEL: name: shl_s16_s16_ss + ; GFX9: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GFX9: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX9: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9: [[SHL:%[0-9]+]]:sgpr(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16) + ; GFX9: S_ENDPGM 0, implicit [[SHL]](s16) + ; GFX10-LABEL: name: shl_s16_s16_ss + ; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GFX10: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX10: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) + ; GFX10: [[SHL:%[0-9]+]]:sgpr(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16) + ; GFX10: S_ENDPGM 0, implicit [[SHL]](s16) + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s32) = COPY $sgpr1 + %2:sgpr(s16) = G_TRUNC %0 + %3:sgpr(s16) = G_TRUNC %1 + %4:sgpr(s16) = G_SHL %2, %3 + S_ENDPGM 0, implicit %4 +... --- -name: shl_s16_ss +name: shl_s16_s16_vs +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0 + ; GFX8-LABEL: name: shl_s16_s16_vs + ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GFX8: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8: S_ENDPGM 0, implicit [[V_LSHLREV_B16_e64_]] + ; GFX9-LABEL: name: shl_s16_s16_vs + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GFX9: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9: S_ENDPGM 0, implicit [[V_LSHLREV_B16_e64_]] + ; GFX10-LABEL: name: shl_s16_s16_vs + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65535, implicit $exec + ; GFX10: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[V_LSHLREV_B16_e64_]], [[V_MOV_B32_e32_]], implicit $exec + ; GFX10: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:sgpr(s32) = COPY $sgpr0 + %2:vgpr(s16) = G_TRUNC %0 + %3:sgpr(s16) = G_TRUNC %1 + %4:vgpr(s16) = G_SHL %2, %3 + S_ENDPGM 0, implicit %4 +... + +--- +name: shl_s16_s32_vv +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX8-LABEL: name: shl_s16_s32_vv + ; GFX8: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX8: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX8: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32) + ; GFX8: S_ENDPGM 0, implicit [[SHL]](s16) + ; GFX9-LABEL: name: shl_s16_s32_vv + ; GFX9: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX9: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX9: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32) + ; GFX9: S_ENDPGM 0, implicit [[SHL]](s16) + ; GFX10-LABEL: name: shl_s16_s32_vv + ; GFX10: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX10: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX10: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32) + ; GFX10: S_ENDPGM 0, implicit [[SHL]](s16) + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s16) = G_TRUNC %0 + %3:vgpr(s16) = G_SHL %2, %1 + S_ENDPGM 0, implicit %3 +... + +--- +name: shl_s16_s16_vv +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX8-LABEL: name: shl_s16_s16_vv + ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8: S_ENDPGM 0, implicit [[V_LSHLREV_B16_e64_]] + ; GFX9-LABEL: name: shl_s16_s16_vv + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX9: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9: S_ENDPGM 0, implicit [[V_LSHLREV_B16_e64_]] + ; GFX10-LABEL: name: shl_s16_s16_vv + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65535, implicit $exec + ; GFX10: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[V_LSHLREV_B16_e64_]], [[V_MOV_B32_e32_]], implicit $exec + ; GFX10: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s16) = G_TRUNC %0 + %3:vgpr(s16) = G_TRUNC %1 + %4:vgpr(s16) = G_SHL %2, %3 + S_ENDPGM 0, implicit %4 +... + +--- +name: shl_s16_s16_vv_zext_to_s32 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX8-LABEL: name: shl_s16_s16_vv_zext_to_s32 + ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8: [[V_BFE_U32_:%[0-9]+]]:vgpr_32 = V_BFE_U32 [[V_LSHLREV_B16_e64_]], 0, 16, implicit $exec + ; GFX8: S_ENDPGM 0, implicit [[V_BFE_U32_]] + ; GFX9-LABEL: name: shl_s16_s16_vv_zext_to_s32 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX9: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9: [[V_BFE_U32_:%[0-9]+]]:vgpr_32 = V_BFE_U32 [[V_LSHLREV_B16_e64_]], 0, 16, implicit $exec + ; GFX9: S_ENDPGM 0, implicit [[V_BFE_U32_]] + ; GFX10-LABEL: name: shl_s16_s16_vv_zext_to_s32 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65535, implicit $exec + ; GFX10: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[V_LSHLREV_B16_e64_]], [[V_MOV_B32_e32_]], implicit $exec + ; GFX10: [[V_BFE_U32_:%[0-9]+]]:vgpr_32 = V_BFE_U32 [[V_AND_B32_e64_]], 0, 16, implicit $exec + ; GFX10: S_ENDPGM 0, implicit [[V_BFE_U32_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s16) = G_TRUNC %0 + %3:vgpr(s16) = G_TRUNC %1 + %4:vgpr(s16) = G_SHL %2, %3 + %5:vgpr(s32) = G_ZEXT %4 + S_ENDPGM 0, implicit %5 +... + +--- +name: shl_s16_vv_zext_to_s64 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX8-LABEL: name: shl_s16_vv_zext_to_s64 + ; GFX8: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX8: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX8: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16) + ; GFX8: [[ZEXT:%[0-9]+]]:vgpr(s64) = G_ZEXT [[SHL]](s16) + ; GFX8: S_ENDPGM 0, implicit [[ZEXT]](s64) + ; GFX9-LABEL: name: shl_s16_vv_zext_to_s64 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX9: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX9: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16) + ; GFX9: [[ZEXT:%[0-9]+]]:vgpr(s64) = G_ZEXT [[SHL]](s16) + ; GFX9: S_ENDPGM 0, implicit [[ZEXT]](s64) + ; GFX10-LABEL: name: shl_s16_vv_zext_to_s64 + ; GFX10: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX10: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX10: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32) + ; GFX10: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16) + ; GFX10: [[ZEXT:%[0-9]+]]:vgpr(s64) = G_ZEXT [[SHL]](s16) + ; GFX10: S_ENDPGM 0, implicit [[ZEXT]](s64) + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s16) = G_TRUNC %0 + %3:vgpr(s16) = G_TRUNC %1 + %4:vgpr(s16) = G_SHL %2, %3 + %5:vgpr(s64) = G_ZEXT %4 + S_ENDPGM 0, implicit %5 +... + +--- +name: shl_s16_s32_ss legalized: true regBankSelected: true body: | bb.0: liveins: $sgpr0, $sgpr1 - ; GFX6-LABEL: name: shl_s16_ss - ; GFX6: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX6: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX6: [[SHL:%[0-9]+]]:sgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32) - ; GFX6: S_ENDPGM 0, implicit [[SHL]](s16) - ; GFX7-LABEL: name: shl_s16_ss - ; GFX7: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX7: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX7: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX7: [[SHL:%[0-9]+]]:sgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32) - ; GFX7: S_ENDPGM 0, implicit [[SHL]](s16) - ; GFX8-LABEL: name: shl_s16_ss + + ; GFX8-LABEL: name: shl_s16_s32_ss ; GFX8: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX8: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; GFX8: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) ; GFX8: [[SHL:%[0-9]+]]:sgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32) ; GFX8: S_ENDPGM 0, implicit [[SHL]](s16) - ; GFX9-LABEL: name: shl_s16_ss + ; GFX9-LABEL: name: shl_s16_s32_ss ; GFX9: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX9: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; GFX9: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) ; GFX9: [[SHL:%[0-9]+]]:sgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32) ; GFX9: S_ENDPGM 0, implicit [[SHL]](s16) - ; GFX10-LABEL: name: shl_s16_ss + ; GFX10-LABEL: name: shl_s16_s32_ss ; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX10: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; GFX10: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) @@ -68,38 +275,26 @@ body: | ... --- -name: shl_s16_sv +name: shl_s16_s32_sv legalized: true regBankSelected: true body: | bb.0: liveins: $sgpr0, $vgpr0 - ; GFX6-LABEL: name: shl_s16_sv - ; GFX6: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX6: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX6: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32) - ; GFX6: S_ENDPGM 0, implicit [[SHL]](s16) - ; GFX7-LABEL: name: shl_s16_sv - ; GFX7: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX7: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX7: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX7: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32) - ; GFX7: S_ENDPGM 0, implicit [[SHL]](s16) - ; GFX8-LABEL: name: shl_s16_sv + ; GFX8-LABEL: name: shl_s16_s32_sv ; GFX8: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX8: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GFX8: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) ; GFX8: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32) ; GFX8: S_ENDPGM 0, implicit [[SHL]](s16) - ; GFX9-LABEL: name: shl_s16_sv + ; GFX9-LABEL: name: shl_s16_s32_sv ; GFX9: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX9: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GFX9: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) ; GFX9: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32) ; GFX9: S_ENDPGM 0, implicit [[SHL]](s16) - ; GFX10-LABEL: name: shl_s16_sv + ; GFX10-LABEL: name: shl_s16_s32_sv ; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX10: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GFX10: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) @@ -113,90 +308,67 @@ body: | ... --- -name: shl_s16_vs +name: shl_s16_s16_sv legalized: true regBankSelected: true body: | bb.0: liveins: $sgpr0, $vgpr0 - ; GFX6-LABEL: name: shl_s16_vs - ; GFX6: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX6: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX6: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32) - ; GFX6: S_ENDPGM 0, implicit [[SHL]](s16) - ; GFX7-LABEL: name: shl_s16_vs - ; GFX7: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX7: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX7: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX7: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32) - ; GFX7: S_ENDPGM 0, implicit [[SHL]](s16) - ; GFX8-LABEL: name: shl_s16_vs - ; GFX8: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX8: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX8: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32) - ; GFX8: S_ENDPGM 0, implicit [[SHL]](s16) - ; GFX9-LABEL: name: shl_s16_vs - ; GFX9: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX9: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32) - ; GFX9: S_ENDPGM 0, implicit [[SHL]](s16) - ; GFX10-LABEL: name: shl_s16_vs - ; GFX10: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX10: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX10: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX10: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32) - ; GFX10: S_ENDPGM 0, implicit [[SHL]](s16) - %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr(s32) = COPY $sgpr0 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_SHL %2, %1 - S_ENDPGM 0, implicit %3 + ; GFX8-LABEL: name: shl_s16_s16_sv + ; GFX8: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8: S_ENDPGM 0, implicit [[V_LSHLREV_B16_e64_]] + ; GFX9-LABEL: name: shl_s16_s16_sv + ; GFX9: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9: S_ENDPGM 0, implicit [[V_LSHLREV_B16_e64_]] + ; GFX10-LABEL: name: shl_s16_s16_sv + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65535, implicit $exec + ; GFX10: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[V_LSHLREV_B16_e64_]], [[V_MOV_B32_e32_]], implicit $exec + ; GFX10: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:vgpr(s32) = COPY $vgpr0 + %2:sgpr(s16) = G_TRUNC %0 + %3:vgpr(s16) = G_TRUNC %1 + %4:vgpr(s16) = G_SHL %2, %3 + S_ENDPGM 0, implicit %4 ... --- -name: shl_s16_vv +name: shl_s16_s32_vs legalized: true regBankSelected: true body: | bb.0: - liveins: $vgpr0, $vgpr1 - ; GFX6-LABEL: name: shl_s16_vv - ; GFX6: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX6: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX6: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32) - ; GFX6: S_ENDPGM 0, implicit [[SHL]](s16) - ; GFX7-LABEL: name: shl_s16_vv - ; GFX7: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX7: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX7: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX7: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32) - ; GFX7: S_ENDPGM 0, implicit [[SHL]](s16) - ; GFX8-LABEL: name: shl_s16_vv + liveins: $sgpr0, $vgpr0 + ; GFX8-LABEL: name: shl_s16_s32_vs ; GFX8: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX8: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX8: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) ; GFX8: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32) ; GFX8: S_ENDPGM 0, implicit [[SHL]](s16) - ; GFX9-LABEL: name: shl_s16_vv + ; GFX9-LABEL: name: shl_s16_s32_vs ; GFX9: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX9: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) ; GFX9: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32) ; GFX9: S_ENDPGM 0, implicit [[SHL]](s16) - ; GFX10-LABEL: name: shl_s16_vv + ; GFX10-LABEL: name: shl_s16_s32_vs ; GFX10: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX10: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX10: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX10: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) ; GFX10: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32) ; GFX10: S_ENDPGM 0, implicit [[SHL]](s16) %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 + %1:sgpr(s32) = COPY $sgpr0 %2:vgpr(s16) = G_TRUNC %0 %3:vgpr(s16) = G_SHL %2, %1 S_ENDPGM 0, implicit %3 From 4bcdcad91bc6548790c95e9f9c3ca062515518ea Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 7 Oct 2019 19:13:27 +0000 Subject: [PATCH 147/254] GlobalISel: Partially implement lower for G_INSERT llvm-svn: 373946 --- .../llvm/CodeGen/GlobalISel/LegalizerHelper.h | 1 + .../CodeGen/GlobalISel/LegalizerHelper.cpp | 41 +++++ .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 10 +- .../AMDGPU/GlobalISel/legalize-insert.mir | 154 +++++++++++++++++- 4 files changed, 193 insertions(+), 13 deletions(-) diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h index 710a9f81d3ba2..bf60319996a79 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h @@ -231,6 +231,7 @@ class LegalizerHelper { LegalizeResult lowerShuffleVector(MachineInstr &MI); LegalizeResult lowerDynStackAlloc(MachineInstr &MI); LegalizeResult lowerExtract(MachineInstr &MI); + LegalizeResult lowerInsert(MachineInstr &MI); private: MachineRegisterInfo &MRI; diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 861c22d605ab2..684b99d8bae3f 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -2249,6 +2249,8 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { return lowerDynStackAlloc(MI); case G_EXTRACT: return lowerExtract(MI); + case G_INSERT: + return lowerInsert(MI); } } @@ -4134,3 +4136,42 @@ LegalizerHelper::lowerExtract(MachineInstr &MI) { return UnableToLegalize; } + +LegalizerHelper::LegalizeResult LegalizerHelper::lowerInsert(MachineInstr &MI) { + Register Dst = MI.getOperand(0).getReg(); + Register Src = MI.getOperand(1).getReg(); + Register InsertSrc = MI.getOperand(2).getReg(); + uint64_t Offset = MI.getOperand(3).getImm(); + + LLT DstTy = MRI.getType(Src); + LLT InsertTy = MRI.getType(InsertSrc); + + if (InsertTy.isScalar() && + (DstTy.isScalar() || + (DstTy.isVector() && DstTy.getElementType() == InsertTy))) { + LLT IntDstTy = DstTy; + if (!DstTy.isScalar()) { + IntDstTy = LLT::scalar(DstTy.getSizeInBits()); + Src = MIRBuilder.buildBitcast(IntDstTy, Src).getReg(0); + } + + Register ExtInsSrc = MIRBuilder.buildZExt(IntDstTy, InsertSrc).getReg(0); + if (Offset != 0) { + auto ShiftAmt = MIRBuilder.buildConstant(IntDstTy, Offset); + ExtInsSrc = MIRBuilder.buildShl(IntDstTy, ExtInsSrc, ShiftAmt).getReg(0); + } + + APInt MaskVal = ~APInt::getBitsSet(DstTy.getSizeInBits(), Offset, + InsertTy.getSizeInBits()); + + auto Mask = MIRBuilder.buildConstant(IntDstTy, MaskVal); + auto MaskedSrc = MIRBuilder.buildAnd(IntDstTy, Src, Mask); + auto Or = MIRBuilder.buildOr(IntDstTy, MaskedSrc, ExtInsSrc); + + MIRBuilder.buildBitcast(Dst, Or); + MI.eraseFromParent(); + return Legalized; + } + + return UnableToLegalize; +} diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index ce724e360ae5d..dfb8ed55d6b57 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -912,13 +912,9 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, unsigned LitTyIdx = Op == G_EXTRACT ? 0 : 1; // FIXME: Doesn't handle extract of illegal sizes. - auto &Builder = getActionDefinitionsBuilder(Op); - - // FIXME: Cleanup when G_INSERT lowering implemented. - if (Op == G_EXTRACT) - Builder.lowerIf(all(typeIs(LitTyIdx, S16), sizeIs(BigTyIdx, 32))); - - Builder + getActionDefinitionsBuilder(Op) + .lowerIf(all(typeIs(LitTyIdx, S16), sizeIs(BigTyIdx, 32))) + // FIXME: Multiples of 16 should not be legal. .legalIf([=](const LegalityQuery &Query) { const LLT BigTy = Query.Types[BigTyIdx]; const LLT LitTy = Query.Types[LitTyIdx]; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert.mir index 066932ec4807a..506dcfe0e8a44 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert.mir @@ -762,15 +762,48 @@ body: | ; CHECK-LABEL: name: test_insert_v2s16_s16_offset0 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK: [[INSERT:%[0-9]+]]:_(<2 x s16>) = G_INSERT [[COPY]], [[TRUNC]](s16), 0 - ; CHECK: $vgpr0 = COPY [[INSERT]](<2 x s16>) + ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -65536 + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[AND]] + ; CHECK: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %1 %3:_(<2 x s16>) = G_INSERT %0, %2, 0 $vgpr0 = COPY %3 ... + +--- +name: test_insert_v2s16_s16_offset1 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; CHECK-LABEL: name: test_insert_v2s16_s16_offset1 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C1]](s32) + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -65535 + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C2]] + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL]] + ; CHECK: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + %0:_(<2 x s16>) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s16) = G_TRUNC %1 + %3:_(<2 x s16>) = G_INSERT %0, %2, 1 + $vgpr0 = COPY %3 +... --- name: test_insert_v2s16_s16_offset16 body: | @@ -780,9 +813,17 @@ body: | ; CHECK-LABEL: name: test_insert_v2s16_s16_offset16 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK: [[INSERT:%[0-9]+]]:_(<2 x s16>) = G_INSERT [[COPY]], [[TRUNC]](s16), 16 - ; CHECK: $vgpr0 = COPY [[INSERT]](<2 x s16>) + ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C1]](s32) + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C2]] + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL]] + ; CHECK: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %1 @@ -1247,3 +1288,104 @@ body: | %3:_(s64) = G_INSERT %0, %2, 48 $vgpr0_vgpr1 = COPY %3 ... +--- +name: test_insert_s32_s16_offset0 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; CHECK-LABEL: name: test_insert_s32_s16_offset0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -65536 + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[AND]] + ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[OR]](s32) + ; CHECK: $vgpr0 = COPY [[BITCAST]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s16) = G_TRUNC %1 + %3:_(s32) = G_INSERT %1, %2, 0 + $vgpr0 = COPY %3 +... + +--- +name: test_insert_s32_s16_offset1 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; CHECK-LABEL: name: test_insert_s32_s16_offset1 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C1]](s32) + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -65535 + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL]] + ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[OR]](s32) + ; CHECK: $vgpr0 = COPY [[BITCAST]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s16) = G_TRUNC %1 + %3:_(s32) = G_INSERT %1, %2, 1 + $vgpr0 = COPY %3 +... + +--- +name: test_insert_s32_s16_offset8 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; CHECK-LABEL: name: test_insert_s32_s16_offset8 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C1]](s32) + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -65281 + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL]] + ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[OR]](s32) + ; CHECK: $vgpr0 = COPY [[BITCAST]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s16) = G_TRUNC %1 + %3:_(s32) = G_INSERT %1, %2, 8 + $vgpr0 = COPY %3 +... + +--- +name: test_insert_s32_s16_offset16 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; CHECK-LABEL: name: test_insert_s32_s16_offset16 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C1]](s32) + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL]] + ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[OR]](s32) + ; CHECK: $vgpr0 = COPY [[BITCAST]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s16) = G_TRUNC %1 + %3:_(s32) = G_INSERT %1, %2, 16 + $vgpr0 = COPY %3 +... From 538b73b7976c83e0224239b14aa1354e5d57138d Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 7 Oct 2019 19:16:26 +0000 Subject: [PATCH 148/254] AMDGPU/GlobalISel: Handle more G_INSERT cases Start manually writing a table to get the subreg index. TableGen should probably generate this, but I'm not sure what it looks like in the arbitrary case where subregisters are allowed to not fully cover the super-registers. llvm-svn: 373947 --- .../AMDGPU/AMDGPUInstructionSelector.cpp | 44 +---- llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp | 66 ++++++-- llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.h | 2 +- .../AMDGPU/GlobalISel/inst-select-insert.mir | 150 +++++++++++++++--- 4 files changed, 185 insertions(+), 77 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index aa165d4ce21e9..afdeacc42910c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -555,48 +555,6 @@ bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(MachineInstr &I) const { return false; } -// FIXME: TableGen should generate something to make this manageable for all -// register classes. At a minimum we could use the opposite of -// composeSubRegIndices and go up from the base 32-bit subreg. -static unsigned getSubRegForSizeAndOffset(const SIRegisterInfo &TRI, - unsigned Size, unsigned Offset) { - switch (Size) { - case 32: - return TRI.getSubRegFromChannel(Offset / 32); - case 64: { - switch (Offset) { - case 0: - return AMDGPU::sub0_sub1; - case 32: - return AMDGPU::sub1_sub2; - case 64: - return AMDGPU::sub2_sub3; - case 96: - return AMDGPU::sub4_sub5; - case 128: - return AMDGPU::sub5_sub6; - case 160: - return AMDGPU::sub7_sub8; - // FIXME: Missing cases up to 1024 bits - default: - return AMDGPU::NoSubRegister; - } - } - case 96: { - switch (Offset) { - case 0: - return AMDGPU::sub0_sub1_sub2; - case 32: - return AMDGPU::sub1_sub2_sub3; - case 64: - return AMDGPU::sub2_sub3_sub4; - } - } - default: - return AMDGPU::NoSubRegister; - } -} - bool AMDGPUInstructionSelector::selectG_INSERT(MachineInstr &I) const { MachineBasicBlock *BB = I.getParent(); @@ -612,7 +570,7 @@ bool AMDGPUInstructionSelector::selectG_INSERT(MachineInstr &I) const { if (Offset % 32 != 0) return false; - unsigned SubReg = getSubRegForSizeAndOffset(TRI, InsSize, Offset); + unsigned SubReg = TRI.getSubRegFromChannel(Offset / 32, InsSize / 32); if (SubReg == AMDGPU::NoSubRegister) return false; diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp index 7cffdf1a4dcf9..9806e6b0714f6 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp @@ -26,19 +26,59 @@ AMDGPURegisterInfo::AMDGPURegisterInfo() : AMDGPUGenRegisterInfo(0) {} // they are not supported at this time. //===----------------------------------------------------------------------===// -unsigned AMDGPURegisterInfo::getSubRegFromChannel(unsigned Channel) { - static const unsigned SubRegs[] = { - AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, AMDGPU::sub4, - AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7, AMDGPU::sub8, AMDGPU::sub9, - AMDGPU::sub10, AMDGPU::sub11, AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, - AMDGPU::sub15, AMDGPU::sub16, AMDGPU::sub17, AMDGPU::sub18, AMDGPU::sub19, - AMDGPU::sub20, AMDGPU::sub21, AMDGPU::sub22, AMDGPU::sub23, AMDGPU::sub24, - AMDGPU::sub25, AMDGPU::sub26, AMDGPU::sub27, AMDGPU::sub28, AMDGPU::sub29, - AMDGPU::sub30, AMDGPU::sub31 - }; - - assert(Channel < array_lengthof(SubRegs)); - return SubRegs[Channel]; +// Table of NumRegs sized pieces at every 32-bit offset. +static const uint16_t SubRegFromChannelTable[][32] = { + { AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, + AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7, + AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11, + AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15, + AMDGPU::sub16, AMDGPU::sub17, AMDGPU::sub18, AMDGPU::sub19, + AMDGPU::sub20, AMDGPU::sub21, AMDGPU::sub22, AMDGPU::sub23, + AMDGPU::sub24, AMDGPU::sub25, AMDGPU::sub26, AMDGPU::sub27, + AMDGPU::sub28, AMDGPU::sub29, AMDGPU::sub30, AMDGPU::sub31 + }, + { + AMDGPU::sub0_sub1, AMDGPU::sub1_sub2, AMDGPU::sub2_sub3, AMDGPU::sub3_sub4, + AMDGPU::sub4_sub5, AMDGPU::sub5_sub6, AMDGPU::sub6_sub7, AMDGPU::sub7_sub8, + AMDGPU::sub8_sub9, AMDGPU::sub9_sub10, AMDGPU::sub10_sub11, AMDGPU::sub11_sub12, + AMDGPU::sub12_sub13, AMDGPU::sub13_sub14, AMDGPU::sub14_sub15, AMDGPU::sub15_sub16, + AMDGPU::sub16_sub17, AMDGPU::sub17_sub18, AMDGPU::sub18_sub19, AMDGPU::sub19_sub20, + AMDGPU::sub20_sub21, AMDGPU::sub21_sub22, AMDGPU::sub22_sub23, AMDGPU::sub23_sub24, + AMDGPU::sub24_sub25, AMDGPU::sub25_sub26, AMDGPU::sub26_sub27, AMDGPU::sub27_sub28, + AMDGPU::sub28_sub29, AMDGPU::sub29_sub30, AMDGPU::sub30_sub31, AMDGPU::NoSubRegister + }, + { + AMDGPU::sub0_sub1_sub2, AMDGPU::sub1_sub2_sub3, AMDGPU::sub2_sub3_sub4, AMDGPU::sub3_sub4_sub5, + AMDGPU::sub4_sub5_sub6, AMDGPU::sub5_sub6_sub7, AMDGPU::sub6_sub7_sub8, AMDGPU::sub7_sub8_sub9, + AMDGPU::sub8_sub9_sub10, AMDGPU::sub9_sub10_sub11, AMDGPU::sub10_sub11_sub12, AMDGPU::sub11_sub12_sub13, + AMDGPU::sub12_sub13_sub14, AMDGPU::sub13_sub14_sub15, AMDGPU::sub14_sub15_sub16, AMDGPU::sub15_sub16_sub17, + AMDGPU::sub16_sub17_sub18, AMDGPU::sub17_sub18_sub19, AMDGPU::sub18_sub19_sub20, AMDGPU::sub19_sub20_sub21, + AMDGPU::sub20_sub21_sub22, AMDGPU::sub21_sub22_sub23, AMDGPU::sub22_sub23_sub24, AMDGPU::sub23_sub24_sub25, + AMDGPU::sub24_sub25_sub26, AMDGPU::sub25_sub26_sub27, AMDGPU::sub26_sub27_sub28, AMDGPU::sub27_sub28_sub29, + AMDGPU::sub28_sub29_sub30, AMDGPU::sub29_sub30_sub31, AMDGPU::NoSubRegister, AMDGPU::NoSubRegister + }, + { + AMDGPU::sub0_sub1_sub2_sub3, AMDGPU::sub1_sub2_sub3_sub4, AMDGPU::sub2_sub3_sub4_sub5, AMDGPU::sub3_sub4_sub5_sub6, + AMDGPU::sub4_sub5_sub6_sub7, AMDGPU::sub5_sub6_sub7_sub8, AMDGPU::sub6_sub7_sub8_sub9, AMDGPU::sub7_sub8_sub9_sub10, + AMDGPU::sub8_sub9_sub10_sub11, AMDGPU::sub9_sub10_sub11_sub12, AMDGPU::sub10_sub11_sub12_sub13, AMDGPU::sub11_sub12_sub13_sub14, + AMDGPU::sub12_sub13_sub14_sub15, AMDGPU::sub13_sub14_sub15_sub16, AMDGPU::sub14_sub15_sub16_sub17, AMDGPU::sub15_sub16_sub17_sub18, + AMDGPU::sub16_sub17_sub18_sub19, AMDGPU::sub17_sub18_sub19_sub20, AMDGPU::sub18_sub19_sub20_sub21, AMDGPU::sub19_sub20_sub21_sub22, + AMDGPU::sub20_sub21_sub22_sub23, AMDGPU::sub21_sub22_sub23_sub24, AMDGPU::sub22_sub23_sub24_sub25, AMDGPU::sub23_sub24_sub25_sub26, + AMDGPU::sub24_sub25_sub26_sub27, AMDGPU::sub25_sub26_sub27_sub28, AMDGPU::sub26_sub27_sub28_sub29, AMDGPU::sub27_sub28_sub29_sub30, + AMDGPU::sub28_sub29_sub30_sub31, AMDGPU::NoSubRegister, AMDGPU::NoSubRegister, AMDGPU::NoSubRegister + } +}; + +// FIXME: TableGen should generate something to make this manageable for all +// register classes. At a minimum we could use the opposite of +// composeSubRegIndices and go up from the base 32-bit subreg. +unsigned AMDGPURegisterInfo::getSubRegFromChannel(unsigned Channel, unsigned NumRegs) { + const unsigned NumRegIndex = NumRegs - 1; + + assert(NumRegIndex < array_lengthof(SubRegFromChannelTable) && + "Not implemented"); + assert(Channel < array_lengthof(SubRegFromChannelTable[0])); + return SubRegFromChannelTable[NumRegIndex][Channel]; } void AMDGPURegisterInfo::reserveRegisterTuples(BitVector &Reserved, unsigned Reg) const { diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.h b/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.h index 3453a8c1b0b39..9e713ca804a11 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.h @@ -28,7 +28,7 @@ struct AMDGPURegisterInfo : public AMDGPUGenRegisterInfo { /// \returns the sub reg enum value for the given \p Channel /// (e.g. getSubRegFromChannel(0) -> AMDGPU::sub0) - static unsigned getSubRegFromChannel(unsigned Channel); + static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs = 1); void reserveRegisterTuples(BitVector &, unsigned Reg) const; }; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.mir index 3cd1b463b5790..c120c96174128 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.mir @@ -303,41 +303,46 @@ body: | --- -name: insert_s_s256_s_s64_96 +name: insert_s_v256_v_s64_96 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9 + ; CHECK-LABEL: name: insert_s_v256_v_s64_96 + ; CHECK: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr8_vgpr9 + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:vreg_256 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub3_sub4 + ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]] + %0:vgpr(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + %1:vgpr(s64) = COPY $vgpr8_vgpr9 + %2:vgpr(s256) = G_INSERT %0, %1, 96 + S_ENDPGM 0, implicit %2 +... + +--- + +name: insert_s_s256_s_s64_128 legalized: true regBankSelected: true body: | bb.0: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9 - ; CHECK-LABEL: name: insert_s_s256_s_s64_96 + ; CHECK-LABEL: name: insert_s_s256_s_s64_128 ; CHECK: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; CHECK: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $sgpr8_sgpr9 + ; CHECK: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $sgpr4_sgpr5 ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:sreg_256 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub4_sub5 ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]] %0:sgpr(s256) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - %1:sgpr(s64) = COPY $sgpr8_sgpr9 - %2:sgpr(s256) = G_INSERT %0, %1, 96 + %1:sgpr(s64) = COPY $sgpr4_sgpr5 + %2:sgpr(s256) = G_INSERT %0, %1, 128 S_ENDPGM 0, implicit %2 ... # --- -# name: insert_s_s256_s_s64_128 -# legalized: true -# regBankSelected: true - -# body: | -# bb.0: -# liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9 -# %0:sgpr(s256) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 -# %1:sgpr(s64) = COPY $sgpr4_sgpr5 -# %2:sgpr(s256) = G_INSERT %0, %1, 128 -# S_ENDPGM 0, implicit %2 -# ... - -# --- - # name: insert_s_s256_s_s64_160 # legalized: true # regBankSelected: true @@ -450,3 +455,108 @@ body: | %2:sgpr(s160) = G_INSERT %0, %1, 64 S_ENDPGM 0, implicit %2 ... + +--- + +name: insert_s_s256_s_s128_0 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11 + + ; CHECK-LABEL: name: insert_s_s256_s_s128_0 + ; CHECK: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; CHECK: [[COPY1:%[0-9]+]]:sreg_128 = COPY $sgpr8_sgpr9_sgpr10_sgpr11 + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:sreg_256 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub0_sub1_sub2_sub3 + ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]] + %0:sgpr(s256) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + %1:sgpr(s128) = COPY $sgpr8_sgpr9_sgpr10_sgpr11 + %2:sgpr(s256) = G_INSERT %0, %1, 0 + S_ENDPGM 0, implicit %2 +... + +--- + +name: insert_v_s256_v_s128_32 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11 + + ; CHECK-LABEL: name: insert_v_s256_v_s128_32 + ; CHECK: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr8_vgpr9_vgpr10_vgpr11 + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:vreg_256 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub1_sub2_sub3_sub4 + ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]] + %0:vgpr(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + %1:vgpr(s128) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 + %2:vgpr(s256) = G_INSERT %0, %1, 32 + S_ENDPGM 0, implicit %2 +... + +--- + +name: insert_v_s256_v_s128_64 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11 + + ; CHECK-LABEL: name: insert_v_s256_v_s128_64 + ; CHECK: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr8_vgpr9_vgpr10_vgpr11 + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:vreg_256 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub2_sub3_sub4_sub5 + ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]] + %0:vgpr(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + %1:vgpr(s128) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 + %2:vgpr(s256) = G_INSERT %0, %1, 64 + S_ENDPGM 0, implicit %2 +... + +--- + +name: insert_v_s256_v_s128_96 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11 + + ; CHECK-LABEL: name: insert_v_s256_v_s128_96 + ; CHECK: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr8_vgpr9_vgpr10_vgpr11 + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:vreg_256 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub3_sub4_sub5_sub6 + ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]] + %0:vgpr(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + %1:vgpr(s128) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 + %2:vgpr(s256) = G_INSERT %0, %1, 96 + S_ENDPGM 0, implicit %2 +... + +--- + +name: insert_v_s256_v_s128_128 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11 + + ; CHECK-LABEL: name: insert_v_s256_v_s128_128 + ; CHECK: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr8_vgpr9_vgpr10_vgpr11 + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:vreg_256 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub4_sub5_sub6_sub7 + ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]] + %0:vgpr(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + %1:vgpr(s128) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 + %2:vgpr(s256) = G_INSERT %0, %1, 128 + S_ENDPGM 0, implicit %2 +... From 0d19662a6a8f7c3bbabf3b095d6fbf76cf9aa376 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Mon, 7 Oct 2019 19:17:02 +0000 Subject: [PATCH 149/254] gn build: try to make system-libs.windows.test pass llvm-svn: 373948 --- llvm/utils/gn/secondary/llvm/tools/llvm-config/BUILD.gn | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/utils/gn/secondary/llvm/tools/llvm-config/BUILD.gn b/llvm/utils/gn/secondary/llvm/tools/llvm-config/BUILD.gn index 0048343312215..58334c8caf637 100644 --- a/llvm/utils/gn/secondary/llvm/tools/llvm-config/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/tools/llvm-config/BUILD.gn @@ -40,7 +40,7 @@ write_cmake_config("BuildVariables.inc") { # lib/Support/Windows/Path.inc. # advapi32 required for CryptAcquireContextW in # lib/Support/Windows/Path.inc - system_libs = "psapi.lib shell32.lib ole32.lib uuid.lib advapi32" + system_libs = "psapi.lib shell32.lib ole32.lib uuid.lib advapi32.lib" } else { system_libs += "-lm" if (host_os == "linux") { From be52ff95063aa3a5f6784d1c3479511d333c7fd6 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Mon, 7 Oct 2019 19:22:04 +0000 Subject: [PATCH 150/254] [libc++abi] Introduce a LIBCXXABI_LIBRARY_VERSION option That option controls the 'VERSION' attribute of the libc++abi shared library, which in turn controls the name of the actual dylib being produced. llvm-svn: 373949 --- libcxxabi/CMakeLists.txt | 5 +++++ libcxxabi/src/CMakeLists.txt | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/libcxxabi/CMakeLists.txt b/libcxxabi/CMakeLists.txt index 8ca169065f48e..fafb777493647 100644 --- a/libcxxabi/CMakeLists.txt +++ b/libcxxabi/CMakeLists.txt @@ -77,6 +77,11 @@ set(LIBCXXABI_TARGET_TRIPLE "" CACHE STRING "Target triple for cross compiling." set(LIBCXXABI_GCC_TOOLCHAIN "" CACHE PATH "GCC toolchain for cross compiling.") set(LIBCXXABI_SYSROOT "" CACHE PATH "Sysroot for cross compiling.") set(LIBCXXABI_LIBCXX_LIBRARY_PATH "" CACHE PATH "The path to libc++ library.") +set(LIBCXXABI_LIBRARY_VERSION "1.0" CACHE STRING +"Version of libc++abi. This will be reflected in the name of the shared \ +library produced. For example, -DLIBCXXABI_LIBRARY_VERSION=x.y will \ +result in the library being named libc++abi.x.y.dylib, along with the \ +usual symlinks pointing to that.") # Default to building a shared library so that the default options still test # the libc++abi that is being built. There are two problems with testing a diff --git a/libcxxabi/src/CMakeLists.txt b/libcxxabi/src/CMakeLists.txt index 0bb164d12ee4e..b38973b0b1794 100644 --- a/libcxxabi/src/CMakeLists.txt +++ b/libcxxabi/src/CMakeLists.txt @@ -176,7 +176,7 @@ if (LIBCXXABI_ENABLE_SHARED) SOVERSION "1" VERSION - "1.0" + "${LIBCXXABI_LIBRARY_VERSION}" DEFINE_SYMBOL "") From 6bdfe3aeba8bb000571c453669817e9f33e24909 Mon Sep 17 00:00:00 2001 From: Amy Huang Date: Mon, 7 Oct 2019 19:41:53 +0000 Subject: [PATCH 151/254] Fix for expanding __pragmas in macro arguments Summary: Avoid parsing __pragma into an annotation token when macro arguments are pre-expanded. This is what clang currently does when parsing _Pragmas. Fixes https://bugs.llvm.org/show_bug.cgi?id=41128, where clang crashed when trying to get the length of an annotation token. Subscribers: cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D68114 llvm-svn: 373950 --- clang/lib/Lex/Pragma.cpp | 79 +++++++++++++--------- clang/test/Preprocessor/pragma_microsoft.c | 12 +++- 2 files changed, 58 insertions(+), 33 deletions(-) diff --git a/clang/lib/Lex/Pragma.cpp b/clang/lib/Lex/Pragma.cpp index 61e424d49618e..79953804b5d3e 100644 --- a/clang/lib/Lex/Pragma.cpp +++ b/clang/lib/Lex/Pragma.cpp @@ -121,6 +121,40 @@ void PragmaNamespace::HandlePragma(Preprocessor &PP, // Preprocessor Pragma Directive Handling. //===----------------------------------------------------------------------===// +namespace { +// TokenCollector provides the option to collect tokens that were "read" +// and return them to the stream to be read later. +// Currently used when reading _Pragma/__pragma directives. +struct TokenCollector { + Preprocessor &Self; + bool Collect; + SmallVector Tokens; + Token &Tok; + + void lex() { + if (Collect) + Tokens.push_back(Tok); + Self.Lex(Tok); + } + + void revert() { + assert(Collect && "did not collect tokens"); + assert(!Tokens.empty() && "collected unexpected number of tokens"); + + // Push the ( "string" ) tokens into the token stream. + auto Toks = std::make_unique(Tokens.size()); + std::copy(Tokens.begin() + 1, Tokens.end(), Toks.get()); + Toks[Tokens.size() - 1] = Tok; + Self.EnterTokenStream(std::move(Toks), Tokens.size(), + /*DisableMacroExpansion*/ true, + /*IsReinject*/ true); + + // ... and return the pragma token unchanged. + Tok = *Tokens.begin(); + } +}; +} // namespace + /// HandlePragmaDirective - The "\#pragma" directive has been parsed. Lex the /// rest of the pragma, passing it to the registered pragma handlers. void Preprocessor::HandlePragmaDirective(PragmaIntroducer Introducer) { @@ -166,35 +200,6 @@ void Preprocessor::Handle_Pragma(Token &Tok) { // In Case #2, we check the syntax now, but then put the tokens back into the // token stream for later consumption. - struct TokenCollector { - Preprocessor &Self; - bool Collect; - SmallVector Tokens; - Token &Tok; - - void lex() { - if (Collect) - Tokens.push_back(Tok); - Self.Lex(Tok); - } - - void revert() { - assert(Collect && "did not collect tokens"); - assert(!Tokens.empty() && "collected unexpected number of tokens"); - - // Push the ( "string" ) tokens into the token stream. - auto Toks = std::make_unique(Tokens.size()); - std::copy(Tokens.begin() + 1, Tokens.end(), Toks.get()); - Toks[Tokens.size() - 1] = Tok; - Self.EnterTokenStream(std::move(Toks), Tokens.size(), - /*DisableMacroExpansion*/ true, - /*IsReinject*/ true); - - // ... and return the _Pragma token unchanged. - Tok = *Tokens.begin(); - } - }; - TokenCollector Toks = {*this, InMacroArgPreExpansion, {}, Tok}; // Remember the pragma token location. @@ -328,11 +333,15 @@ void Preprocessor::Handle_Pragma(Token &Tok) { /// HandleMicrosoft__pragma - Like Handle_Pragma except the pragma text /// is not enclosed within a string literal. void Preprocessor::HandleMicrosoft__pragma(Token &Tok) { + // During macro pre-expansion, check the syntax now but put the tokens back + // into the token stream for later consumption. Same as Handle_Pragma. + TokenCollector Toks = {*this, InMacroArgPreExpansion, {}, Tok}; + // Remember the pragma token location. SourceLocation PragmaLoc = Tok.getLocation(); // Read the '('. - Lex(Tok); + Toks.lex(); if (Tok.isNot(tok::l_paren)) { Diag(PragmaLoc, diag::err__Pragma_malformed); return; @@ -341,14 +350,14 @@ void Preprocessor::HandleMicrosoft__pragma(Token &Tok) { // Get the tokens enclosed within the __pragma(), as well as the final ')'. SmallVector PragmaToks; int NumParens = 0; - Lex(Tok); + Toks.lex(); while (Tok.isNot(tok::eof)) { PragmaToks.push_back(Tok); if (Tok.is(tok::l_paren)) NumParens++; else if (Tok.is(tok::r_paren) && NumParens-- == 0) break; - Lex(Tok); + Toks.lex(); } if (Tok.is(tok::eof)) { @@ -356,6 +365,12 @@ void Preprocessor::HandleMicrosoft__pragma(Token &Tok) { return; } + // If we're expanding a macro argument, put the tokens back. + if (InMacroArgPreExpansion) { + Toks.revert(); + return; + } + PragmaToks.front().setFlag(Token::LeadingSpace); // Replace the ')' with an EOD to mark the end of the pragma. diff --git a/clang/test/Preprocessor/pragma_microsoft.c b/clang/test/Preprocessor/pragma_microsoft.c index 9d62d01838749..020292a4b2566 100644 --- a/clang/test/Preprocessor/pragma_microsoft.c +++ b/clang/test/Preprocessor/pragma_microsoft.c @@ -51,6 +51,8 @@ __pragma(comment(linker," bar=" BAR)) __pragma(warning(pop)); \ } +#define PRAGMA_IN_ARGS(p) p + void f() { __pragma() // expected-warning{{unknown pragma ignored}} @@ -64,8 +66,16 @@ void f() // CHECK: #pragma warning(disable: 10000) // CHECK: ; 1 + (2 > 3) ? 4 : 5; // CHECK: #pragma warning(pop) -} + // Check that macro arguments can contain __pragma. + PRAGMA_IN_ARGS(MACRO_WITH__PRAGMA) // expected-warning {{lower precedence}} \ + // expected-note 2 {{place parentheses}} \ + // expected-warning {{expression result unused}} +// CHECK: #pragma warning(push) +// CHECK: #pragma warning(disable: 10000) +// CHECK: ; 1 + (2 > 3) ? 4 : 5; +// CHECK: #pragma warning(pop) +} // This should include macro_arg_directive even though the include // is looking for test.h This allows us to assign to "n" From 0db7b6a44c47078f54633a06d59ac60f39e5bc0e Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Mon, 7 Oct 2019 19:54:19 +0000 Subject: [PATCH 152/254] Attempt to fix a few clang-tidy tests on Windows, see PR43593. llvm-svn: 373951 --- clang-tools-extra/test/clang-tidy/bugprone-branch-clone.cpp | 2 +- .../test/clang-tidy/bugprone-unhandled-self-assignment.cpp | 2 +- .../test/clang-tidy/cppcoreguidelines-init-variables.cpp | 2 +- clang-tools-extra/test/clang-tidy/misc-redundant-expression.cpp | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/clang-tools-extra/test/clang-tidy/bugprone-branch-clone.cpp b/clang-tools-extra/test/clang-tidy/bugprone-branch-clone.cpp index d8b8870b9b4a7..af16f5105552b 100644 --- a/clang-tools-extra/test/clang-tidy/bugprone-branch-clone.cpp +++ b/clang-tools-extra/test/clang-tidy/bugprone-branch-clone.cpp @@ -1,4 +1,4 @@ -// RUN: %check_clang_tidy %s bugprone-branch-clone %t +// RUN: %check_clang_tidy %s bugprone-branch-clone %t -- -- -fno-delayed-template-parsing void test_basic1(int in, int &out) { if (in > 77) diff --git a/clang-tools-extra/test/clang-tidy/bugprone-unhandled-self-assignment.cpp b/clang-tools-extra/test/clang-tidy/bugprone-unhandled-self-assignment.cpp index c2385d039b49b..49bb5314f9ebe 100644 --- a/clang-tools-extra/test/clang-tidy/bugprone-unhandled-self-assignment.cpp +++ b/clang-tools-extra/test/clang-tidy/bugprone-unhandled-self-assignment.cpp @@ -1,4 +1,4 @@ -// RUN: %check_clang_tidy %s bugprone-unhandled-self-assignment %t +// RUN: %check_clang_tidy %s bugprone-unhandled-self-assignment %t -- -- -fno-delayed-template-parsing namespace std { diff --git a/clang-tools-extra/test/clang-tidy/cppcoreguidelines-init-variables.cpp b/clang-tools-extra/test/clang-tidy/cppcoreguidelines-init-variables.cpp index 893c1d2877983..d43e44808a49a 100644 --- a/clang-tools-extra/test/clang-tidy/cppcoreguidelines-init-variables.cpp +++ b/clang-tools-extra/test/clang-tidy/cppcoreguidelines-init-variables.cpp @@ -1,4 +1,4 @@ -// RUN: %check_clang_tidy %s cppcoreguidelines-init-variables %t +// RUN: %check_clang_tidy %s cppcoreguidelines-init-variables %t -- -- -fno-delayed-template-parsing // Ensure that function declarations are not changed. void some_func(int x, double d, bool b, const char *p); diff --git a/clang-tools-extra/test/clang-tidy/misc-redundant-expression.cpp b/clang-tools-extra/test/clang-tidy/misc-redundant-expression.cpp index 35a0f407b27f7..f6b47eb79fb9c 100644 --- a/clang-tools-extra/test/clang-tidy/misc-redundant-expression.cpp +++ b/clang-tools-extra/test/clang-tidy/misc-redundant-expression.cpp @@ -1,4 +1,4 @@ -// RUN: %check_clang_tidy %s misc-redundant-expression %t +// RUN: %check_clang_tidy %s misc-redundant-expression %t -- -- -fno-delayed-template-parsing typedef __INT64_TYPE__ I64; From d457f7e080259ffe0d0123446a2a2e50a99e0b7e Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Mon, 7 Oct 2019 19:57:40 +0000 Subject: [PATCH 153/254] [OPENMP]Fix caonical->canonical, NFC. Fixed typo. llvm-svn: 373952 --- clang/include/clang/AST/StmtOpenMP.h | 4 ++-- clang/lib/CodeGen/CGStmtOpenMP.cpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/clang/include/clang/AST/StmtOpenMP.h b/clang/include/clang/AST/StmtOpenMP.h index ef69158d61f94..9d58c1d793a82 100644 --- a/clang/include/clang/AST/StmtOpenMP.h +++ b/clang/include/clang/AST/StmtOpenMP.h @@ -1092,7 +1092,7 @@ class OMPLoopDirective : public OMPExecutableDirective { Body = For->getBody(); } else { assert(isa(Body) && - "Expected caonical for loop or range-based for loop."); + "Expected canonical for loop or range-based for loop."); Body = cast(Body)->getBody(); } for (unsigned Cnt = 1; Cnt < CollapsedNum; ++Cnt) { @@ -1101,7 +1101,7 @@ class OMPLoopDirective : public OMPExecutableDirective { Body = For->getBody(); } else { assert(isa(Body) && - "Expected caonical for loop or range-based for loop."); + "Expected canonical for loop or range-based for loop."); Body = cast(Body)->getBody(); } } diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index c70244d779405..0c264bb371c43 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -151,7 +151,7 @@ class OMPLoopScope : public CodeGenFunction::RunCleanupsScope { Body = For->getBody(); } else { assert(isa(Body) && - "Expected caonical for loop or range-based for loop."); + "Expected canonical for loop or range-based for loop."); auto *CXXFor = cast(Body); if (const Stmt *Init = CXXFor->getInit()) CGF.EmitStmt(Init); @@ -1377,7 +1377,7 @@ void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D, Body = For->getBody(); } else { assert(isa(Body) && - "Expected caonical for loop or range-based for loop."); + "Expected canonical for loop or range-based for loop."); auto *CXXFor = cast(Body); EmitStmt(CXXFor->getLoopVarStmt()); Body = CXXFor->getBody(); From 8b6dcc1d8ce672713f0c0c1853f936c5d7b5d6ea Mon Sep 17 00:00:00 2001 From: Walter Erquinigo Date: Mon, 7 Oct 2019 20:26:49 +0000 Subject: [PATCH 154/254] [gdb-remote] process properly effective uid Summary: Someone wrote SetEffectiveSetEffectiveGroupID instead of SetEffectiveUserID. After this fix, the android process list can show user names, e.g. ``` PID PARENT USER GROUP EFF USER EFF GROUP TRIPLE ARGUMENTS ====== ====== ========== ========== ========== ========== ============================== ============================ 529 1 root 0 root 0 /sbin/ueventd ``` Reviewers: labath,clayborg,aadsm,xiaobai Subscribers: llvm-svn: 373953 --- .../gdb_remote_client/TestPlatformClient.py | 8 ++++++-- .../Process/gdb-remote/GDBRemoteCommunicationClient.cpp | 2 +- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/lldb/packages/Python/lldbsuite/test/functionalities/gdb_remote_client/TestPlatformClient.py b/lldb/packages/Python/lldbsuite/test/functionalities/gdb_remote_client/TestPlatformClient.py index d0087770256ec..03d29c57a5aa0 100644 --- a/lldb/packages/Python/lldbsuite/test/functionalities/gdb_remote_client/TestPlatformClient.py +++ b/lldb/packages/Python/lldbsuite/test/functionalities/gdb_remote_client/TestPlatformClient.py @@ -8,13 +8,13 @@ class TestPlatformClient(GDBRemoteTestBase): - def test_process_list_with_all_users(self): + def test_process_list(self): """Test connecting to a remote linux platform""" class MyResponder(MockGDBServerResponder): def qfProcessInfo(self, packet): if "all_users:1" in packet: - return "pid:10;ppid:1;uid:1;gid:1;euid:1;egid:1;name:" + binascii.hexlify("/a/process") + ";args:" + return "pid:10;ppid:1;uid:2;gid:3;euid:4;egid:5;name:" + binascii.hexlify("/a/process") + ";args:" else: return "E04" @@ -28,6 +28,10 @@ def qfProcessInfo(self, packet): self.assertTrue(self.dbg.GetSelectedPlatform().IsConnected()) self.expect("platform process list -x", startstr="1 matching process was found", endstr="process" + os.linesep) + self.expect("platform process list -xv", + substrs=[ + "PID PARENT USER GROUP EFF USER EFF GROUP", + "10 1 2 3 4 5"]) self.expect("platform process list", error="error: no processes were found on the \"remote-linux\" platform") finally: diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp index 7cb9ce0f52f33..f6ef45792459a 100644 --- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp +++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp @@ -1906,7 +1906,7 @@ bool GDBRemoteCommunicationClient::DecodeProcessInfoResponse( } else if (name.equals("euid")) { uint32_t uid = UINT32_MAX; value.getAsInteger(0, uid); - process_info.SetEffectiveGroupID(uid); + process_info.SetEffectiveUserID(uid); } else if (name.equals("gid")) { uint32_t gid = UINT32_MAX; value.getAsInteger(0, gid); From 369407fc52238ba2d11628975e345d766ce24fee Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Mon, 7 Oct 2019 20:31:22 +0000 Subject: [PATCH 155/254] [MachO] Shuffle some things around in ParseSymtab (NFC) llvm-svn: 373954 --- .../source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp index 15f221e58fda9..d109dc1c4905c 100644 --- a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp +++ b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp @@ -3685,7 +3685,7 @@ size_t ObjectFileMachO::ParseSymtab() { if (!ParseNList(nlist_data, nlist_data_offset, nlist_byte_size, nlist)) break; - SymbolType type = eSymbolTypeInvalid; + const char *symbol_name_non_abi_mangled = nullptr; const char *symbol_name = nullptr; if (have_strtab_data) { @@ -3710,17 +3710,17 @@ size_t ObjectFileMachO::ParseSymtab() { str_error)) symbol_name = memory_symbol_name.c_str(); } - const char *symbol_name_non_abi_mangled = nullptr; + SymbolType type = eSymbolTypeInvalid; SectionSP symbol_section; lldb::addr_t symbol_byte_size = 0; bool add_nlist = true; bool is_gsym = false; - bool is_debug = ((nlist.n_type & N_STAB) != 0); bool demangled_is_synthesized = false; bool set_value = true; - assert(sym_idx < num_syms); + const bool is_debug = ((nlist.n_type & N_STAB) != 0); + assert(sym_idx < num_syms); sym[sym_idx].SetDebug(is_debug); if (is_debug) { @@ -4073,7 +4073,6 @@ size_t ObjectFileMachO::ParseSymtab() { break; } } else { - // uint8_t n_pext = N_PEXT & nlist.n_type; uint8_t n_type = N_TYPE & nlist.n_type; sym[sym_idx].SetExternal((N_EXT & nlist.n_type) != 0); From 4db091754b401832b8748b6aa431015771bf0908 Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Mon, 7 Oct 2019 20:31:28 +0000 Subject: [PATCH 156/254] [test] Rename `Modules` to `ObjectFile` (NFC) llvm-svn: 373955 --- .../Breakpad/Inputs/bad-module-id-1.syms | 0 .../Breakpad/Inputs/bad-module-id-2.syms | 0 .../Breakpad/Inputs/bad-module-id-3.syms | 0 .../Breakpad/Inputs/discontiguous-sections.syms | 0 .../Breakpad/Inputs/identification-linux.syms | 0 .../Inputs/identification-macosx-arm64e.syms | 0 .../Breakpad/Inputs/identification-macosx.syms | 0 .../Breakpad/Inputs/identification-windows.syms | 0 .../Breakpad/Inputs/sections-trailing-func.syms | 0 .../Breakpad/Inputs/sections.syms | 0 .../Breakpad/Inputs/uuid-matching-mac.syms | 0 .../Breakpad/Inputs/uuid-matching-mac.yaml | 0 .../Breakpad/breakpad-identification.test | 0 .../Breakpad/discontiguous-sections.test | 0 .../{Modules => ObjectFile}/Breakpad/lit.local.cfg | 0 .../Breakpad/sections-trailing-func.test | 0 .../{Modules => ObjectFile}/Breakpad/sections.test | 0 .../Breakpad/uuid-matching-mac.test | 0 .../ELF/Inputs/PT_LOAD-overlap-section.elf | Bin .../ELF/Inputs/minidebuginfo-main.c | 0 .../ELF/Inputs/netbsd-amd64.core | Bin .../{Modules => ObjectFile}/ELF/PT_LOAD-empty.yaml | 0 .../ELF/PT_LOAD-overlap-PT_INTERP.yaml | 0 .../ELF/PT_LOAD-overlap-PT_TLS.yaml | 0 .../ELF/PT_LOAD-overlap-section.yaml | 0 .../ELF/PT_LOAD-overlap.yaml | 0 lldb/lit/{Modules => ObjectFile}/ELF/PT_LOAD.yaml | 0 .../ELF/PT_TLS-overlap-PT_LOAD.yaml | 0 .../ELF/aarch64-relocations.yaml | 0 .../{Modules => ObjectFile}/ELF/base-address.yaml | 0 .../lit/{Modules => ObjectFile}/ELF/basic-info.yaml | 0 .../{Modules => ObjectFile}/ELF/build-id-case.yaml | 0 .../ELF/compressed-sections.yaml | 0 .../ELF/duplicate-section.yaml | 0 .../{Modules => ObjectFile}/ELF/gnu-debuglink.yaml | 0 .../lit/{Modules => ObjectFile}/ELF/many-sections.s | 0 .../ELF/minidebuginfo-corrupt-xz.yaml | 0 .../ELF/minidebuginfo-find-symbols.yaml | 0 .../ELF/minidebuginfo-no-lzma.yaml | 0 .../ELF/minidebuginfo-set-and-hit-breakpoint.test | 0 .../ELF/netbsd-core-amd64.test | 0 .../ELF/netbsd-exec-8.99.30-amd64.yaml | 0 .../ELF/section-addresses.yaml | 0 .../ELF/section-overlap.yaml | 0 .../ELF/section-permissions.yaml | 0 .../ELF/section-types-edgecases.yaml | 0 .../{Modules => ObjectFile}/ELF/section-types.yaml | 0 .../{Modules => ObjectFile}/ELF/short-build-id.yaml | 0 .../MachO/lc_build_version.yaml | 0 .../MachO/lc_build_version_notools.yaml | 0 .../MachO/lc_version_min.yaml | 0 .../{Modules => ObjectFile}/MachO/subsections.yaml | 0 .../PECOFF/basic-info-arm.yaml | 0 .../PECOFF/basic-info-arm64.yaml | 0 .../{Modules => ObjectFile}/PECOFF/basic-info.yaml | 0 .../{Modules => ObjectFile}/PECOFF/dep-modules.yaml | 0 .../PECOFF/export-dllfunc.yaml | 0 .../{Modules => ObjectFile}/PECOFF/lit.local.cfg | 0 .../PECOFF/sections-names.yaml | 0 .../{Modules => ObjectFile}/PECOFF/subsections.yaml | 0 lldb/lit/{Modules => ObjectFile}/PECOFF/uuid.yaml | 0 lldb/lit/{Modules => ObjectFile}/lit.local.cfg | 0 62 files changed, 0 insertions(+), 0 deletions(-) rename lldb/lit/{Modules => ObjectFile}/Breakpad/Inputs/bad-module-id-1.syms (100%) rename lldb/lit/{Modules => ObjectFile}/Breakpad/Inputs/bad-module-id-2.syms (100%) rename lldb/lit/{Modules => ObjectFile}/Breakpad/Inputs/bad-module-id-3.syms (100%) rename lldb/lit/{Modules => ObjectFile}/Breakpad/Inputs/discontiguous-sections.syms (100%) rename lldb/lit/{Modules => ObjectFile}/Breakpad/Inputs/identification-linux.syms (100%) rename lldb/lit/{Modules => ObjectFile}/Breakpad/Inputs/identification-macosx-arm64e.syms (100%) rename lldb/lit/{Modules => ObjectFile}/Breakpad/Inputs/identification-macosx.syms (100%) rename lldb/lit/{Modules => ObjectFile}/Breakpad/Inputs/identification-windows.syms (100%) rename lldb/lit/{Modules => ObjectFile}/Breakpad/Inputs/sections-trailing-func.syms (100%) rename lldb/lit/{Modules => ObjectFile}/Breakpad/Inputs/sections.syms (100%) rename lldb/lit/{Modules => ObjectFile}/Breakpad/Inputs/uuid-matching-mac.syms (100%) rename lldb/lit/{Modules => ObjectFile}/Breakpad/Inputs/uuid-matching-mac.yaml (100%) rename lldb/lit/{Modules => ObjectFile}/Breakpad/breakpad-identification.test (100%) rename lldb/lit/{Modules => ObjectFile}/Breakpad/discontiguous-sections.test (100%) rename lldb/lit/{Modules => ObjectFile}/Breakpad/lit.local.cfg (100%) rename lldb/lit/{Modules => ObjectFile}/Breakpad/sections-trailing-func.test (100%) rename lldb/lit/{Modules => ObjectFile}/Breakpad/sections.test (100%) rename lldb/lit/{Modules => ObjectFile}/Breakpad/uuid-matching-mac.test (100%) rename lldb/lit/{Modules => ObjectFile}/ELF/Inputs/PT_LOAD-overlap-section.elf (100%) rename lldb/lit/{Modules => ObjectFile}/ELF/Inputs/minidebuginfo-main.c (100%) rename lldb/lit/{Modules => ObjectFile}/ELF/Inputs/netbsd-amd64.core (100%) rename lldb/lit/{Modules => ObjectFile}/ELF/PT_LOAD-empty.yaml (100%) rename lldb/lit/{Modules => ObjectFile}/ELF/PT_LOAD-overlap-PT_INTERP.yaml (100%) rename lldb/lit/{Modules => ObjectFile}/ELF/PT_LOAD-overlap-PT_TLS.yaml (100%) rename lldb/lit/{Modules => ObjectFile}/ELF/PT_LOAD-overlap-section.yaml (100%) rename lldb/lit/{Modules => ObjectFile}/ELF/PT_LOAD-overlap.yaml (100%) rename lldb/lit/{Modules => ObjectFile}/ELF/PT_LOAD.yaml (100%) rename lldb/lit/{Modules => ObjectFile}/ELF/PT_TLS-overlap-PT_LOAD.yaml (100%) rename lldb/lit/{Modules => ObjectFile}/ELF/aarch64-relocations.yaml (100%) rename lldb/lit/{Modules => ObjectFile}/ELF/base-address.yaml (100%) rename lldb/lit/{Modules => ObjectFile}/ELF/basic-info.yaml (100%) rename lldb/lit/{Modules => ObjectFile}/ELF/build-id-case.yaml (100%) rename lldb/lit/{Modules => ObjectFile}/ELF/compressed-sections.yaml (100%) rename lldb/lit/{Modules => ObjectFile}/ELF/duplicate-section.yaml (100%) rename lldb/lit/{Modules => ObjectFile}/ELF/gnu-debuglink.yaml (100%) rename lldb/lit/{Modules => ObjectFile}/ELF/many-sections.s (100%) rename lldb/lit/{Modules => ObjectFile}/ELF/minidebuginfo-corrupt-xz.yaml (100%) rename lldb/lit/{Modules => ObjectFile}/ELF/minidebuginfo-find-symbols.yaml (100%) rename lldb/lit/{Modules => ObjectFile}/ELF/minidebuginfo-no-lzma.yaml (100%) rename lldb/lit/{Modules => ObjectFile}/ELF/minidebuginfo-set-and-hit-breakpoint.test (100%) rename lldb/lit/{Modules => ObjectFile}/ELF/netbsd-core-amd64.test (100%) rename lldb/lit/{Modules => ObjectFile}/ELF/netbsd-exec-8.99.30-amd64.yaml (100%) rename lldb/lit/{Modules => ObjectFile}/ELF/section-addresses.yaml (100%) rename lldb/lit/{Modules => ObjectFile}/ELF/section-overlap.yaml (100%) rename lldb/lit/{Modules => ObjectFile}/ELF/section-permissions.yaml (100%) rename lldb/lit/{Modules => ObjectFile}/ELF/section-types-edgecases.yaml (100%) rename lldb/lit/{Modules => ObjectFile}/ELF/section-types.yaml (100%) rename lldb/lit/{Modules => ObjectFile}/ELF/short-build-id.yaml (100%) rename lldb/lit/{Modules => ObjectFile}/MachO/lc_build_version.yaml (100%) rename lldb/lit/{Modules => ObjectFile}/MachO/lc_build_version_notools.yaml (100%) rename lldb/lit/{Modules => ObjectFile}/MachO/lc_version_min.yaml (100%) rename lldb/lit/{Modules => ObjectFile}/MachO/subsections.yaml (100%) rename lldb/lit/{Modules => ObjectFile}/PECOFF/basic-info-arm.yaml (100%) rename lldb/lit/{Modules => ObjectFile}/PECOFF/basic-info-arm64.yaml (100%) rename lldb/lit/{Modules => ObjectFile}/PECOFF/basic-info.yaml (100%) rename lldb/lit/{Modules => ObjectFile}/PECOFF/dep-modules.yaml (100%) rename lldb/lit/{Modules => ObjectFile}/PECOFF/export-dllfunc.yaml (100%) rename lldb/lit/{Modules => ObjectFile}/PECOFF/lit.local.cfg (100%) rename lldb/lit/{Modules => ObjectFile}/PECOFF/sections-names.yaml (100%) rename lldb/lit/{Modules => ObjectFile}/PECOFF/subsections.yaml (100%) rename lldb/lit/{Modules => ObjectFile}/PECOFF/uuid.yaml (100%) rename lldb/lit/{Modules => ObjectFile}/lit.local.cfg (100%) diff --git a/lldb/lit/Modules/Breakpad/Inputs/bad-module-id-1.syms b/lldb/lit/ObjectFile/Breakpad/Inputs/bad-module-id-1.syms similarity index 100% rename from lldb/lit/Modules/Breakpad/Inputs/bad-module-id-1.syms rename to lldb/lit/ObjectFile/Breakpad/Inputs/bad-module-id-1.syms diff --git a/lldb/lit/Modules/Breakpad/Inputs/bad-module-id-2.syms b/lldb/lit/ObjectFile/Breakpad/Inputs/bad-module-id-2.syms similarity index 100% rename from lldb/lit/Modules/Breakpad/Inputs/bad-module-id-2.syms rename to lldb/lit/ObjectFile/Breakpad/Inputs/bad-module-id-2.syms diff --git a/lldb/lit/Modules/Breakpad/Inputs/bad-module-id-3.syms b/lldb/lit/ObjectFile/Breakpad/Inputs/bad-module-id-3.syms similarity index 100% rename from lldb/lit/Modules/Breakpad/Inputs/bad-module-id-3.syms rename to lldb/lit/ObjectFile/Breakpad/Inputs/bad-module-id-3.syms diff --git a/lldb/lit/Modules/Breakpad/Inputs/discontiguous-sections.syms b/lldb/lit/ObjectFile/Breakpad/Inputs/discontiguous-sections.syms similarity index 100% rename from lldb/lit/Modules/Breakpad/Inputs/discontiguous-sections.syms rename to lldb/lit/ObjectFile/Breakpad/Inputs/discontiguous-sections.syms diff --git a/lldb/lit/Modules/Breakpad/Inputs/identification-linux.syms b/lldb/lit/ObjectFile/Breakpad/Inputs/identification-linux.syms similarity index 100% rename from lldb/lit/Modules/Breakpad/Inputs/identification-linux.syms rename to lldb/lit/ObjectFile/Breakpad/Inputs/identification-linux.syms diff --git a/lldb/lit/Modules/Breakpad/Inputs/identification-macosx-arm64e.syms b/lldb/lit/ObjectFile/Breakpad/Inputs/identification-macosx-arm64e.syms similarity index 100% rename from lldb/lit/Modules/Breakpad/Inputs/identification-macosx-arm64e.syms rename to lldb/lit/ObjectFile/Breakpad/Inputs/identification-macosx-arm64e.syms diff --git a/lldb/lit/Modules/Breakpad/Inputs/identification-macosx.syms b/lldb/lit/ObjectFile/Breakpad/Inputs/identification-macosx.syms similarity index 100% rename from lldb/lit/Modules/Breakpad/Inputs/identification-macosx.syms rename to lldb/lit/ObjectFile/Breakpad/Inputs/identification-macosx.syms diff --git a/lldb/lit/Modules/Breakpad/Inputs/identification-windows.syms b/lldb/lit/ObjectFile/Breakpad/Inputs/identification-windows.syms similarity index 100% rename from lldb/lit/Modules/Breakpad/Inputs/identification-windows.syms rename to lldb/lit/ObjectFile/Breakpad/Inputs/identification-windows.syms diff --git a/lldb/lit/Modules/Breakpad/Inputs/sections-trailing-func.syms b/lldb/lit/ObjectFile/Breakpad/Inputs/sections-trailing-func.syms similarity index 100% rename from lldb/lit/Modules/Breakpad/Inputs/sections-trailing-func.syms rename to lldb/lit/ObjectFile/Breakpad/Inputs/sections-trailing-func.syms diff --git a/lldb/lit/Modules/Breakpad/Inputs/sections.syms b/lldb/lit/ObjectFile/Breakpad/Inputs/sections.syms similarity index 100% rename from lldb/lit/Modules/Breakpad/Inputs/sections.syms rename to lldb/lit/ObjectFile/Breakpad/Inputs/sections.syms diff --git a/lldb/lit/Modules/Breakpad/Inputs/uuid-matching-mac.syms b/lldb/lit/ObjectFile/Breakpad/Inputs/uuid-matching-mac.syms similarity index 100% rename from lldb/lit/Modules/Breakpad/Inputs/uuid-matching-mac.syms rename to lldb/lit/ObjectFile/Breakpad/Inputs/uuid-matching-mac.syms diff --git a/lldb/lit/Modules/Breakpad/Inputs/uuid-matching-mac.yaml b/lldb/lit/ObjectFile/Breakpad/Inputs/uuid-matching-mac.yaml similarity index 100% rename from lldb/lit/Modules/Breakpad/Inputs/uuid-matching-mac.yaml rename to lldb/lit/ObjectFile/Breakpad/Inputs/uuid-matching-mac.yaml diff --git a/lldb/lit/Modules/Breakpad/breakpad-identification.test b/lldb/lit/ObjectFile/Breakpad/breakpad-identification.test similarity index 100% rename from lldb/lit/Modules/Breakpad/breakpad-identification.test rename to lldb/lit/ObjectFile/Breakpad/breakpad-identification.test diff --git a/lldb/lit/Modules/Breakpad/discontiguous-sections.test b/lldb/lit/ObjectFile/Breakpad/discontiguous-sections.test similarity index 100% rename from lldb/lit/Modules/Breakpad/discontiguous-sections.test rename to lldb/lit/ObjectFile/Breakpad/discontiguous-sections.test diff --git a/lldb/lit/Modules/Breakpad/lit.local.cfg b/lldb/lit/ObjectFile/Breakpad/lit.local.cfg similarity index 100% rename from lldb/lit/Modules/Breakpad/lit.local.cfg rename to lldb/lit/ObjectFile/Breakpad/lit.local.cfg diff --git a/lldb/lit/Modules/Breakpad/sections-trailing-func.test b/lldb/lit/ObjectFile/Breakpad/sections-trailing-func.test similarity index 100% rename from lldb/lit/Modules/Breakpad/sections-trailing-func.test rename to lldb/lit/ObjectFile/Breakpad/sections-trailing-func.test diff --git a/lldb/lit/Modules/Breakpad/sections.test b/lldb/lit/ObjectFile/Breakpad/sections.test similarity index 100% rename from lldb/lit/Modules/Breakpad/sections.test rename to lldb/lit/ObjectFile/Breakpad/sections.test diff --git a/lldb/lit/Modules/Breakpad/uuid-matching-mac.test b/lldb/lit/ObjectFile/Breakpad/uuid-matching-mac.test similarity index 100% rename from lldb/lit/Modules/Breakpad/uuid-matching-mac.test rename to lldb/lit/ObjectFile/Breakpad/uuid-matching-mac.test diff --git a/lldb/lit/Modules/ELF/Inputs/PT_LOAD-overlap-section.elf b/lldb/lit/ObjectFile/ELF/Inputs/PT_LOAD-overlap-section.elf similarity index 100% rename from lldb/lit/Modules/ELF/Inputs/PT_LOAD-overlap-section.elf rename to lldb/lit/ObjectFile/ELF/Inputs/PT_LOAD-overlap-section.elf diff --git a/lldb/lit/Modules/ELF/Inputs/minidebuginfo-main.c b/lldb/lit/ObjectFile/ELF/Inputs/minidebuginfo-main.c similarity index 100% rename from lldb/lit/Modules/ELF/Inputs/minidebuginfo-main.c rename to lldb/lit/ObjectFile/ELF/Inputs/minidebuginfo-main.c diff --git a/lldb/lit/Modules/ELF/Inputs/netbsd-amd64.core b/lldb/lit/ObjectFile/ELF/Inputs/netbsd-amd64.core similarity index 100% rename from lldb/lit/Modules/ELF/Inputs/netbsd-amd64.core rename to lldb/lit/ObjectFile/ELF/Inputs/netbsd-amd64.core diff --git a/lldb/lit/Modules/ELF/PT_LOAD-empty.yaml b/lldb/lit/ObjectFile/ELF/PT_LOAD-empty.yaml similarity index 100% rename from lldb/lit/Modules/ELF/PT_LOAD-empty.yaml rename to lldb/lit/ObjectFile/ELF/PT_LOAD-empty.yaml diff --git a/lldb/lit/Modules/ELF/PT_LOAD-overlap-PT_INTERP.yaml b/lldb/lit/ObjectFile/ELF/PT_LOAD-overlap-PT_INTERP.yaml similarity index 100% rename from lldb/lit/Modules/ELF/PT_LOAD-overlap-PT_INTERP.yaml rename to lldb/lit/ObjectFile/ELF/PT_LOAD-overlap-PT_INTERP.yaml diff --git a/lldb/lit/Modules/ELF/PT_LOAD-overlap-PT_TLS.yaml b/lldb/lit/ObjectFile/ELF/PT_LOAD-overlap-PT_TLS.yaml similarity index 100% rename from lldb/lit/Modules/ELF/PT_LOAD-overlap-PT_TLS.yaml rename to lldb/lit/ObjectFile/ELF/PT_LOAD-overlap-PT_TLS.yaml diff --git a/lldb/lit/Modules/ELF/PT_LOAD-overlap-section.yaml b/lldb/lit/ObjectFile/ELF/PT_LOAD-overlap-section.yaml similarity index 100% rename from lldb/lit/Modules/ELF/PT_LOAD-overlap-section.yaml rename to lldb/lit/ObjectFile/ELF/PT_LOAD-overlap-section.yaml diff --git a/lldb/lit/Modules/ELF/PT_LOAD-overlap.yaml b/lldb/lit/ObjectFile/ELF/PT_LOAD-overlap.yaml similarity index 100% rename from lldb/lit/Modules/ELF/PT_LOAD-overlap.yaml rename to lldb/lit/ObjectFile/ELF/PT_LOAD-overlap.yaml diff --git a/lldb/lit/Modules/ELF/PT_LOAD.yaml b/lldb/lit/ObjectFile/ELF/PT_LOAD.yaml similarity index 100% rename from lldb/lit/Modules/ELF/PT_LOAD.yaml rename to lldb/lit/ObjectFile/ELF/PT_LOAD.yaml diff --git a/lldb/lit/Modules/ELF/PT_TLS-overlap-PT_LOAD.yaml b/lldb/lit/ObjectFile/ELF/PT_TLS-overlap-PT_LOAD.yaml similarity index 100% rename from lldb/lit/Modules/ELF/PT_TLS-overlap-PT_LOAD.yaml rename to lldb/lit/ObjectFile/ELF/PT_TLS-overlap-PT_LOAD.yaml diff --git a/lldb/lit/Modules/ELF/aarch64-relocations.yaml b/lldb/lit/ObjectFile/ELF/aarch64-relocations.yaml similarity index 100% rename from lldb/lit/Modules/ELF/aarch64-relocations.yaml rename to lldb/lit/ObjectFile/ELF/aarch64-relocations.yaml diff --git a/lldb/lit/Modules/ELF/base-address.yaml b/lldb/lit/ObjectFile/ELF/base-address.yaml similarity index 100% rename from lldb/lit/Modules/ELF/base-address.yaml rename to lldb/lit/ObjectFile/ELF/base-address.yaml diff --git a/lldb/lit/Modules/ELF/basic-info.yaml b/lldb/lit/ObjectFile/ELF/basic-info.yaml similarity index 100% rename from lldb/lit/Modules/ELF/basic-info.yaml rename to lldb/lit/ObjectFile/ELF/basic-info.yaml diff --git a/lldb/lit/Modules/ELF/build-id-case.yaml b/lldb/lit/ObjectFile/ELF/build-id-case.yaml similarity index 100% rename from lldb/lit/Modules/ELF/build-id-case.yaml rename to lldb/lit/ObjectFile/ELF/build-id-case.yaml diff --git a/lldb/lit/Modules/ELF/compressed-sections.yaml b/lldb/lit/ObjectFile/ELF/compressed-sections.yaml similarity index 100% rename from lldb/lit/Modules/ELF/compressed-sections.yaml rename to lldb/lit/ObjectFile/ELF/compressed-sections.yaml diff --git a/lldb/lit/Modules/ELF/duplicate-section.yaml b/lldb/lit/ObjectFile/ELF/duplicate-section.yaml similarity index 100% rename from lldb/lit/Modules/ELF/duplicate-section.yaml rename to lldb/lit/ObjectFile/ELF/duplicate-section.yaml diff --git a/lldb/lit/Modules/ELF/gnu-debuglink.yaml b/lldb/lit/ObjectFile/ELF/gnu-debuglink.yaml similarity index 100% rename from lldb/lit/Modules/ELF/gnu-debuglink.yaml rename to lldb/lit/ObjectFile/ELF/gnu-debuglink.yaml diff --git a/lldb/lit/Modules/ELF/many-sections.s b/lldb/lit/ObjectFile/ELF/many-sections.s similarity index 100% rename from lldb/lit/Modules/ELF/many-sections.s rename to lldb/lit/ObjectFile/ELF/many-sections.s diff --git a/lldb/lit/Modules/ELF/minidebuginfo-corrupt-xz.yaml b/lldb/lit/ObjectFile/ELF/minidebuginfo-corrupt-xz.yaml similarity index 100% rename from lldb/lit/Modules/ELF/minidebuginfo-corrupt-xz.yaml rename to lldb/lit/ObjectFile/ELF/minidebuginfo-corrupt-xz.yaml diff --git a/lldb/lit/Modules/ELF/minidebuginfo-find-symbols.yaml b/lldb/lit/ObjectFile/ELF/minidebuginfo-find-symbols.yaml similarity index 100% rename from lldb/lit/Modules/ELF/minidebuginfo-find-symbols.yaml rename to lldb/lit/ObjectFile/ELF/minidebuginfo-find-symbols.yaml diff --git a/lldb/lit/Modules/ELF/minidebuginfo-no-lzma.yaml b/lldb/lit/ObjectFile/ELF/minidebuginfo-no-lzma.yaml similarity index 100% rename from lldb/lit/Modules/ELF/minidebuginfo-no-lzma.yaml rename to lldb/lit/ObjectFile/ELF/minidebuginfo-no-lzma.yaml diff --git a/lldb/lit/Modules/ELF/minidebuginfo-set-and-hit-breakpoint.test b/lldb/lit/ObjectFile/ELF/minidebuginfo-set-and-hit-breakpoint.test similarity index 100% rename from lldb/lit/Modules/ELF/minidebuginfo-set-and-hit-breakpoint.test rename to lldb/lit/ObjectFile/ELF/minidebuginfo-set-and-hit-breakpoint.test diff --git a/lldb/lit/Modules/ELF/netbsd-core-amd64.test b/lldb/lit/ObjectFile/ELF/netbsd-core-amd64.test similarity index 100% rename from lldb/lit/Modules/ELF/netbsd-core-amd64.test rename to lldb/lit/ObjectFile/ELF/netbsd-core-amd64.test diff --git a/lldb/lit/Modules/ELF/netbsd-exec-8.99.30-amd64.yaml b/lldb/lit/ObjectFile/ELF/netbsd-exec-8.99.30-amd64.yaml similarity index 100% rename from lldb/lit/Modules/ELF/netbsd-exec-8.99.30-amd64.yaml rename to lldb/lit/ObjectFile/ELF/netbsd-exec-8.99.30-amd64.yaml diff --git a/lldb/lit/Modules/ELF/section-addresses.yaml b/lldb/lit/ObjectFile/ELF/section-addresses.yaml similarity index 100% rename from lldb/lit/Modules/ELF/section-addresses.yaml rename to lldb/lit/ObjectFile/ELF/section-addresses.yaml diff --git a/lldb/lit/Modules/ELF/section-overlap.yaml b/lldb/lit/ObjectFile/ELF/section-overlap.yaml similarity index 100% rename from lldb/lit/Modules/ELF/section-overlap.yaml rename to lldb/lit/ObjectFile/ELF/section-overlap.yaml diff --git a/lldb/lit/Modules/ELF/section-permissions.yaml b/lldb/lit/ObjectFile/ELF/section-permissions.yaml similarity index 100% rename from lldb/lit/Modules/ELF/section-permissions.yaml rename to lldb/lit/ObjectFile/ELF/section-permissions.yaml diff --git a/lldb/lit/Modules/ELF/section-types-edgecases.yaml b/lldb/lit/ObjectFile/ELF/section-types-edgecases.yaml similarity index 100% rename from lldb/lit/Modules/ELF/section-types-edgecases.yaml rename to lldb/lit/ObjectFile/ELF/section-types-edgecases.yaml diff --git a/lldb/lit/Modules/ELF/section-types.yaml b/lldb/lit/ObjectFile/ELF/section-types.yaml similarity index 100% rename from lldb/lit/Modules/ELF/section-types.yaml rename to lldb/lit/ObjectFile/ELF/section-types.yaml diff --git a/lldb/lit/Modules/ELF/short-build-id.yaml b/lldb/lit/ObjectFile/ELF/short-build-id.yaml similarity index 100% rename from lldb/lit/Modules/ELF/short-build-id.yaml rename to lldb/lit/ObjectFile/ELF/short-build-id.yaml diff --git a/lldb/lit/Modules/MachO/lc_build_version.yaml b/lldb/lit/ObjectFile/MachO/lc_build_version.yaml similarity index 100% rename from lldb/lit/Modules/MachO/lc_build_version.yaml rename to lldb/lit/ObjectFile/MachO/lc_build_version.yaml diff --git a/lldb/lit/Modules/MachO/lc_build_version_notools.yaml b/lldb/lit/ObjectFile/MachO/lc_build_version_notools.yaml similarity index 100% rename from lldb/lit/Modules/MachO/lc_build_version_notools.yaml rename to lldb/lit/ObjectFile/MachO/lc_build_version_notools.yaml diff --git a/lldb/lit/Modules/MachO/lc_version_min.yaml b/lldb/lit/ObjectFile/MachO/lc_version_min.yaml similarity index 100% rename from lldb/lit/Modules/MachO/lc_version_min.yaml rename to lldb/lit/ObjectFile/MachO/lc_version_min.yaml diff --git a/lldb/lit/Modules/MachO/subsections.yaml b/lldb/lit/ObjectFile/MachO/subsections.yaml similarity index 100% rename from lldb/lit/Modules/MachO/subsections.yaml rename to lldb/lit/ObjectFile/MachO/subsections.yaml diff --git a/lldb/lit/Modules/PECOFF/basic-info-arm.yaml b/lldb/lit/ObjectFile/PECOFF/basic-info-arm.yaml similarity index 100% rename from lldb/lit/Modules/PECOFF/basic-info-arm.yaml rename to lldb/lit/ObjectFile/PECOFF/basic-info-arm.yaml diff --git a/lldb/lit/Modules/PECOFF/basic-info-arm64.yaml b/lldb/lit/ObjectFile/PECOFF/basic-info-arm64.yaml similarity index 100% rename from lldb/lit/Modules/PECOFF/basic-info-arm64.yaml rename to lldb/lit/ObjectFile/PECOFF/basic-info-arm64.yaml diff --git a/lldb/lit/Modules/PECOFF/basic-info.yaml b/lldb/lit/ObjectFile/PECOFF/basic-info.yaml similarity index 100% rename from lldb/lit/Modules/PECOFF/basic-info.yaml rename to lldb/lit/ObjectFile/PECOFF/basic-info.yaml diff --git a/lldb/lit/Modules/PECOFF/dep-modules.yaml b/lldb/lit/ObjectFile/PECOFF/dep-modules.yaml similarity index 100% rename from lldb/lit/Modules/PECOFF/dep-modules.yaml rename to lldb/lit/ObjectFile/PECOFF/dep-modules.yaml diff --git a/lldb/lit/Modules/PECOFF/export-dllfunc.yaml b/lldb/lit/ObjectFile/PECOFF/export-dllfunc.yaml similarity index 100% rename from lldb/lit/Modules/PECOFF/export-dllfunc.yaml rename to lldb/lit/ObjectFile/PECOFF/export-dllfunc.yaml diff --git a/lldb/lit/Modules/PECOFF/lit.local.cfg b/lldb/lit/ObjectFile/PECOFF/lit.local.cfg similarity index 100% rename from lldb/lit/Modules/PECOFF/lit.local.cfg rename to lldb/lit/ObjectFile/PECOFF/lit.local.cfg diff --git a/lldb/lit/Modules/PECOFF/sections-names.yaml b/lldb/lit/ObjectFile/PECOFF/sections-names.yaml similarity index 100% rename from lldb/lit/Modules/PECOFF/sections-names.yaml rename to lldb/lit/ObjectFile/PECOFF/sections-names.yaml diff --git a/lldb/lit/Modules/PECOFF/subsections.yaml b/lldb/lit/ObjectFile/PECOFF/subsections.yaml similarity index 100% rename from lldb/lit/Modules/PECOFF/subsections.yaml rename to lldb/lit/ObjectFile/PECOFF/subsections.yaml diff --git a/lldb/lit/Modules/PECOFF/uuid.yaml b/lldb/lit/ObjectFile/PECOFF/uuid.yaml similarity index 100% rename from lldb/lit/Modules/PECOFF/uuid.yaml rename to lldb/lit/ObjectFile/PECOFF/uuid.yaml diff --git a/lldb/lit/Modules/lit.local.cfg b/lldb/lit/ObjectFile/lit.local.cfg similarity index 100% rename from lldb/lit/Modules/lit.local.cfg rename to lldb/lit/ObjectFile/lit.local.cfg From 61446a14219a61940949565d1f270ecd4c185526 Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Mon, 7 Oct 2019 20:33:20 +0000 Subject: [PATCH 157/254] [AccelTable] Remove stale comment (NFC) rdar://55857228 llvm-svn: 373956 --- llvm/include/llvm/CodeGen/AccelTable.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/llvm/include/llvm/CodeGen/AccelTable.h b/llvm/include/llvm/CodeGen/AccelTable.h index 734531a65d505..f8f6b5448f3f1 100644 --- a/llvm/include/llvm/CodeGen/AccelTable.h +++ b/llvm/include/llvm/CodeGen/AccelTable.h @@ -101,8 +101,6 @@ /// /// An Apple Accelerator Table can be serialized by calling emitAppleAccelTable /// function. -/// -/// TODO: Add DWARF v5 emission code. namespace llvm { From 2b371fbeddc12617c51cd37dffb03e3cfeed9917 Mon Sep 17 00:00:00 2001 From: Davide Italiano Date: Mon, 7 Oct 2019 20:35:22 +0000 Subject: [PATCH 158/254] [debugserver] Include the correct header. llvm-svn: 373957 --- lldb/tools/debugserver/source/MacOSX/MachVMMemory.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lldb/tools/debugserver/source/MacOSX/MachVMMemory.cpp b/lldb/tools/debugserver/source/MacOSX/MachVMMemory.cpp index 74ddae67958f3..2b039c7b16cb1 100644 --- a/lldb/tools/debugserver/source/MacOSX/MachVMMemory.cpp +++ b/lldb/tools/debugserver/source/MacOSX/MachVMMemory.cpp @@ -20,7 +20,7 @@ #if defined(WITH_FBS) || defined(WITH_BKS) extern "C" { -#import +#import } #endif From 46d317fad4627c92a0289e886695a047ed886e27 Mon Sep 17 00:00:00 2001 From: Cameron McInally Date: Mon, 7 Oct 2019 20:41:25 +0000 Subject: [PATCH 159/254] [Bitcode] Update naming of UNOP_NEG to UNOP_FNEG Differential Revision: https://reviews.llvm.org/D68588 llvm-svn: 373958 --- llvm/include/llvm/Bitcode/LLVMBitCodes.h | 2 +- llvm/lib/Bitcode/Reader/BitcodeReader.cpp | 2 +- llvm/lib/Bitcode/Writer/BitcodeWriter.cpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/llvm/include/llvm/Bitcode/LLVMBitCodes.h index decd4dd3a9659..1a397068caf0a 100644 --- a/llvm/include/llvm/Bitcode/LLVMBitCodes.h +++ b/llvm/include/llvm/Bitcode/LLVMBitCodes.h @@ -391,7 +391,7 @@ enum CastOpcodes { /// have no fixed relation to the LLVM IR enum values. Changing these will /// break compatibility with old files. enum UnaryOpcodes { - UNOP_NEG = 0 + UNOP_FNEG = 0 }; /// BinaryOpcodes - These are values used in the bitcode files to encode which diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp index 9c377552b4a36..bd1115f238b36 100644 --- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -1063,7 +1063,7 @@ static int getDecodedUnaryOpcode(unsigned Val, Type *Ty) { switch (Val) { default: return -1; - case bitc::UNOP_NEG: + case bitc::UNOP_FNEG: return IsFP ? Instruction::FNeg : -1; } } diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index 2818e843b50b6..deb4019ea8ba7 100644 --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -520,7 +520,7 @@ static unsigned getEncodedCastOpcode(unsigned Opcode) { static unsigned getEncodedUnaryOpcode(unsigned Opcode) { switch (Opcode) { default: llvm_unreachable("Unknown binary instruction!"); - case Instruction::FNeg: return bitc::UNOP_NEG; + case Instruction::FNeg: return bitc::UNOP_FNEG; } } From c3b394ffba583a53e5c1cc45360f18a3e5c023fd Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Mon, 7 Oct 2019 20:52:52 +0000 Subject: [PATCH 160/254] [InstCombine] dropRedundantMaskingOfLeftShiftInput(): propagate undef shift amounts Summary: When we do `ConstantExpr::getZExt()`, that "extends" `undef` to `0`, which means that for patterns a/b we'd assume that we must not produce any bits for that channel, while in reality we simply didn't care about that channel - i.e. we don't need to mask it. Reviewers: spatel Reviewed By: spatel Subscribers: hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D68239 llvm-svn: 373960 --- .../InstCombine/InstCombineShifts.cpp | 33 +++++++++++++++++++ ...dant-left-shift-input-masking-variant-a.ll | 2 +- ...dant-left-shift-input-masking-variant-b.ll | 2 +- ...dant-left-shift-input-masking-variant-c.ll | 2 +- ...dant-left-shift-input-masking-variant-d.ll | 2 +- ...dant-left-shift-input-masking-variant-e.ll | 2 +- 6 files changed, 38 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp index 8ab4aeb38beaa..6675ab12aeecc 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -117,6 +117,24 @@ reassociateShiftAmtsOfTwoSameDirectionShifts(BinaryOperator *Sh0, return Ret; } +// Try to replace `undef` constants in C with Replacement. +static Constant *replaceUndefsWith(Constant *C, Constant *Replacement) { + if (C && match(C, m_Undef())) + return Replacement; + + if (auto *CV = dyn_cast(C)) { + llvm::SmallVector NewOps(CV->getNumOperands()); + for (unsigned i = 0, NumElts = NewOps.size(); i != NumElts; ++i) { + Constant *EltC = CV->getOperand(i); + NewOps[i] = EltC && match(EltC, m_Undef()) ? Replacement : EltC; + } + return ConstantVector::get(NewOps); + } + + // Don't know how to deal with this constant. + return C; +} + // If we have some pattern that leaves only some low bits set, and then performs // left-shift of those bits, if none of the bits that are left after the final // shift are modified by the mask, we can omit the mask. @@ -177,6 +195,14 @@ dropRedundantMaskingOfLeftShiftInput(BinaryOperator *OuterShift, // The mask must be computed in a type twice as wide to ensure // that no bits are lost if the sum-of-shifts is wider than the base type. Type *ExtendedTy = Ty->getExtendedType(); + // An extend of an undef value becomes zero because the high bits are + // never completely unknown. Replace the the `undef` shift amounts with + // final shift bitwidth to ensure that the value remains undef when + // creating the subsequent shift op. + SumOfShAmts = replaceUndefsWith( + SumOfShAmts, + ConstantInt::get(SumOfShAmts->getType()->getScalarType(), + ExtendedTy->getScalarType()->getScalarSizeInBits())); auto *ExtendedSumOfShAmts = ConstantExpr::getZExt(SumOfShAmts, ExtendedTy); // And compute the mask as usual: ~(-1 << (SumOfShAmts)) @@ -212,6 +238,13 @@ dropRedundantMaskingOfLeftShiftInput(BinaryOperator *OuterShift, // The mask must be computed in a type twice as wide to ensure // that no bits are lost if the sum-of-shifts is wider than the base type. Type *ExtendedTy = Ty->getExtendedType(); + // An extend of an undef value becomes zero because the high bits are + // never completely unknown. Replace the the `undef` shift amounts with + // negated shift bitwidth to ensure that the value remains undef when + // creating the subsequent shift op. + ShAmtsDiff = replaceUndefsWith( + ShAmtsDiff, + ConstantInt::get(ShAmtsDiff->getType()->getScalarType(), -BitWidth)); auto *ExtendedNumHighBitsToClear = ConstantExpr::getZExt( ConstantExpr::getAdd( ConstantExpr::getNeg(ShAmtsDiff), diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-a.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-a.ll index bcaf6440efc7a..205dcfbf22cbf 100644 --- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-a.ll +++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-a.ll @@ -82,7 +82,7 @@ define <8 x i32> @t1_vec_splat_undef(<8 x i32> %x, <8 x i32> %nbits) { ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T2]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T4]]) ; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i32> [[X:%.*]], [[T4]] -; CHECK-NEXT: [[T5:%.*]] = and <8 x i32> [[TMP1]], +; CHECK-NEXT: [[T5:%.*]] = and <8 x i32> [[TMP1]], ; CHECK-NEXT: ret <8 x i32> [[T5]] ; %t0 = add <8 x i32> %nbits, diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-b.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-b.ll index faf069cfec9d5..4d3d5432375c7 100644 --- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-b.ll +++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-b.ll @@ -82,7 +82,7 @@ define <8 x i32> @t1_vec_splat_undef(<8 x i32> %x, <8 x i32> %nbits) { ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T2]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T4]]) ; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i32> [[X:%.*]], [[T4]] -; CHECK-NEXT: [[T5:%.*]] = and <8 x i32> [[TMP1]], +; CHECK-NEXT: [[T5:%.*]] = and <8 x i32> [[TMP1]], ; CHECK-NEXT: ret <8 x i32> [[T5]] ; %t0 = add <8 x i32> %nbits, diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-c.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-c.ll index 2c6bb31ebf772..fc964f8725b5e 100644 --- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-c.ll +++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-c.ll @@ -62,7 +62,7 @@ define <8 x i32> @t1_vec_splat_undef(<8 x i32> %x, <8 x i32> %nbits) { ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T0]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T2]]) ; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i32> [[X:%.*]], [[T2]] -; CHECK-NEXT: [[T3:%.*]] = and <8 x i32> [[TMP1]], +; CHECK-NEXT: [[T3:%.*]] = and <8 x i32> [[TMP1]], ; CHECK-NEXT: ret <8 x i32> [[T3]] ; %t0 = lshr <8 x i32> , %nbits diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-d.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-d.ll index fcbb7eb50d5d2..83c0df1d83e44 100644 --- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-d.ll +++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-d.ll @@ -72,7 +72,7 @@ define <8 x i32> @t2_vec_splat_undef(<8 x i32> %x, <8 x i32> %nbits) { ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T1]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T3]]) ; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i32> [[X:%.*]], [[T3]] -; CHECK-NEXT: [[T4:%.*]] = and <8 x i32> [[TMP1]], +; CHECK-NEXT: [[T4:%.*]] = and <8 x i32> [[TMP1]], ; CHECK-NEXT: ret <8 x i32> [[T4]] ; %t0 = shl <8 x i32> , %nbits diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-e.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-e.ll index 2b3fb5098aacf..200d414d82587 100644 --- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-e.ll +++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-e.ll @@ -62,7 +62,7 @@ define <8 x i32> @t1_vec_splat_undef(<8 x i32> %x, <8 x i32> %nbits) { ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T0]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T2]]) ; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i32> [[X]], [[T2]] -; CHECK-NEXT: [[T3:%.*]] = and <8 x i32> [[TMP1]], +; CHECK-NEXT: [[T3:%.*]] = and <8 x i32> [[TMP1]], ; CHECK-NEXT: ret <8 x i32> [[T3]] ; %t0 = shl <8 x i32> %x, %nbits From cb6d851bb65dbba3e4fc6a82226e9b308b11f89c Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Mon, 7 Oct 2019 20:53:00 +0000 Subject: [PATCH 161/254] [InstCombine][NFC] dropRedundantMaskingOfLeftShiftInput(): change how we deal with mask Summary: Currently, we pre-check whether we need to produce a mask or not. This involves some rather magical constants. I'd like to extend this fold to also handle the situation when there's also a `trunc` before outer shift. That will require another set of magical constants. It's ugly. Instead, we can just compute the mask, and check whether mask is a pass-through (all-ones) or not. This way we don't need to have any magical numbers. This change is NFC other than the fact that we now compute the mask and then check if we need (and can!) apply it. Reviewers: spatel Reviewed By: spatel Subscribers: hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D68470 llvm-svn: 373961 --- .../InstCombine/InstCombineShifts.cpp | 132 ++++++++---------- 1 file changed, 62 insertions(+), 70 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp index 6675ab12aeecc..a325b29afa63d 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -181,39 +181,29 @@ dropRedundantMaskingOfLeftShiftInput(BinaryOperator *OuterShift, MaskShAmt, ShiftShAmt, /*IsNSW=*/false, /*IsNUW=*/false, Q)); if (!SumOfShAmts) return nullptr; // Did not simplify. + // In this pattern SumOfShAmts correlates with the number of low bits + // that shall remain in the root value (OuterShift). + Type *Ty = X->getType(); - unsigned BitWidth = Ty->getScalarSizeInBits(); - // In this pattern SumOfShAmts correlates with the number of low bits that - // shall remain in the root value (OuterShift). If SumOfShAmts is less than - // bitwidth, we'll need to also produce a mask to keep SumOfShAmts low bits. - // So, does *any* channel need a mask? - if (!match(SumOfShAmts, m_SpecificInt_ICMP(ICmpInst::Predicate::ICMP_UGE, - APInt(BitWidth, BitWidth)))) { - // But for a mask we need to get rid of old masking instruction. - if (!Masked->hasOneUse()) - return nullptr; // Else we can't perform the fold. - // The mask must be computed in a type twice as wide to ensure - // that no bits are lost if the sum-of-shifts is wider than the base type. - Type *ExtendedTy = Ty->getExtendedType(); - // An extend of an undef value becomes zero because the high bits are - // never completely unknown. Replace the the `undef` shift amounts with - // final shift bitwidth to ensure that the value remains undef when - // creating the subsequent shift op. - SumOfShAmts = replaceUndefsWith( - SumOfShAmts, - ConstantInt::get(SumOfShAmts->getType()->getScalarType(), - ExtendedTy->getScalarType()->getScalarSizeInBits())); - auto *ExtendedSumOfShAmts = - ConstantExpr::getZExt(SumOfShAmts, ExtendedTy); - // And compute the mask as usual: ~(-1 << (SumOfShAmts)) - auto *ExtendedAllOnes = ConstantExpr::getAllOnesValue(ExtendedTy); - auto *ExtendedInvertedMask = - ConstantExpr::getShl(ExtendedAllOnes, ExtendedSumOfShAmts); - auto *ExtendedMask = ConstantExpr::getNot(ExtendedInvertedMask); - NewMask = ConstantExpr::getTrunc(ExtendedMask, Ty); - } else - NewMask = nullptr; // No mask needed. - // All good, we can do this fold. + + // The mask must be computed in a type twice as wide to ensure + // that no bits are lost if the sum-of-shifts is wider than the base type. + Type *ExtendedTy = Ty->getExtendedType(); + // An extend of an undef value becomes zero because the high bits are never + // completely unknown. Replace the the `undef` shift amounts with final + // shift bitwidth to ensure that the value remains undef when creating the + // subsequent shift op. + SumOfShAmts = replaceUndefsWith( + SumOfShAmts, + ConstantInt::get(SumOfShAmts->getType()->getScalarType(), + ExtendedTy->getScalarType()->getScalarSizeInBits())); + auto *ExtendedSumOfShAmts = ConstantExpr::getZExt(SumOfShAmts, ExtendedTy); + // And compute the mask as usual: ~(-1 << (SumOfShAmts)) + auto *ExtendedAllOnes = ConstantExpr::getAllOnesValue(ExtendedTy); + auto *ExtendedInvertedMask = + ConstantExpr::getShl(ExtendedAllOnes, ExtendedSumOfShAmts); + auto *ExtendedMask = ConstantExpr::getNot(ExtendedInvertedMask); + NewMask = ConstantExpr::getTrunc(ExtendedMask, Ty); } else if (match(Masked, m_c_And(m_CombineOr(MaskC, MaskD), m_Value(X))) || match(Masked, m_Shr(m_Shl(m_Value(X), m_Value(MaskShAmt)), m_Deferred(MaskShAmt)))) { @@ -223,49 +213,51 @@ dropRedundantMaskingOfLeftShiftInput(BinaryOperator *OuterShift, if (!ShAmtsDiff) return nullptr; // Did not simplify. // In this pattern ShAmtsDiff correlates with the number of high bits that - // shall be unset in the root value (OuterShift). If ShAmtsDiff is negative, - // we'll need to also produce a mask to unset ShAmtsDiff high bits. - // So, does *any* channel need a mask? (is ShiftShAmt u>= MaskShAmt ?) - if (!match(ShAmtsDiff, m_NonNegative())) { - // This sub-fold (with mask) is invalid for 'ashr' "masking" instruction. - if (match(Masked, m_AShr(m_Value(), m_Value()))) - return nullptr; - // For a mask we need to get rid of old masking instruction. - if (!Masked->hasOneUse()) - return nullptr; // Else we can't perform the fold. - Type *Ty = X->getType(); - unsigned BitWidth = Ty->getScalarSizeInBits(); - // The mask must be computed in a type twice as wide to ensure - // that no bits are lost if the sum-of-shifts is wider than the base type. - Type *ExtendedTy = Ty->getExtendedType(); - // An extend of an undef value becomes zero because the high bits are - // never completely unknown. Replace the the `undef` shift amounts with - // negated shift bitwidth to ensure that the value remains undef when - // creating the subsequent shift op. - ShAmtsDiff = replaceUndefsWith( - ShAmtsDiff, - ConstantInt::get(ShAmtsDiff->getType()->getScalarType(), -BitWidth)); - auto *ExtendedNumHighBitsToClear = ConstantExpr::getZExt( - ConstantExpr::getAdd( - ConstantExpr::getNeg(ShAmtsDiff), - ConstantInt::get(Ty, BitWidth, /*isSigned=*/false)), - ExtendedTy); - // And compute the mask as usual: (-1 l>> (ShAmtsDiff)) - auto *ExtendedAllOnes = ConstantExpr::getAllOnesValue(ExtendedTy); - auto *ExtendedMask = - ConstantExpr::getLShr(ExtendedAllOnes, ExtendedNumHighBitsToClear); - NewMask = ConstantExpr::getTrunc(ExtendedMask, Ty); - } else - NewMask = nullptr; // No mask needed. - // All good, we can do this fold. + // shall be unset in the root value (OuterShift). + + Type *Ty = X->getType(); + unsigned BitWidth = Ty->getScalarSizeInBits(); + + // The mask must be computed in a type twice as wide to ensure + // that no bits are lost if the sum-of-shifts is wider than the base type. + Type *ExtendedTy = Ty->getExtendedType(); + // An extend of an undef value becomes zero because the high bits are never + // completely unknown. Replace the the `undef` shift amounts with negated + // shift bitwidth to ensure that the value remains undef when creating the + // subsequent shift op. + ShAmtsDiff = replaceUndefsWith( + ShAmtsDiff, + ConstantInt::get(ShAmtsDiff->getType()->getScalarType(), -BitWidth)); + auto *ExtendedNumHighBitsToClear = ConstantExpr::getZExt( + ConstantExpr::getSub(ConstantInt::get(ShAmtsDiff->getType(), BitWidth, + /*isSigned=*/false), + ShAmtsDiff), + ExtendedTy); + // And compute the mask as usual: (-1 l>> (NumHighBitsToClear)) + auto *ExtendedAllOnes = ConstantExpr::getAllOnesValue(ExtendedTy); + auto *ExtendedMask = + ConstantExpr::getLShr(ExtendedAllOnes, ExtendedNumHighBitsToClear); + NewMask = ConstantExpr::getTrunc(ExtendedMask, Ty); } else return nullptr; // Don't know anything about this pattern. - // No 'NUW'/'NSW'! - // We no longer know that we won't shift-out non-0 bits. + // Does this mask has any unset bits? If not then we can just not apply it. + bool NeedMask = !match(NewMask, m_AllOnes()); + + // If we need to apply a mask, there are several more restrictions we have. + if (NeedMask) { + // The old masking instruction must go away. + if (!Masked->hasOneUse()) + return nullptr; + // The original "masking" instruction must not have been`ashr`. + if (match(Masked, m_AShr(m_Value(), m_Value()))) + return nullptr; + } + + // No 'NUW'/'NSW'! We no longer know that we won't shift-out non-0 bits. auto *NewShift = BinaryOperator::Create(OuterShift->getOpcode(), X, ShiftShAmt); - if (!NewMask) + if (!NeedMask) return NewShift; Builder.Insert(NewShift); From 0c73be590e67af0bab088483f167c44538ff449d Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Mon, 7 Oct 2019 20:53:08 +0000 Subject: [PATCH 162/254] [InstCombine] Move isSignBitCheck(), handle rest of the predicates True, no test coverage is being added here. But those non-canonical predicates that are already handled here already have no test coverage as far as i can tell. I tried to add tests for them, but all the patterns already get handled elsewhere. llvm-svn: 373962 --- .../InstCombine/InstCombineCompares.cpp | 28 ------------- .../InstCombine/InstCombineInternal.h | 39 +++++++++++++++++++ 2 files changed, 39 insertions(+), 28 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index f07f64e3f02ea..ee51bc03312fb 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -69,34 +69,6 @@ static bool hasBranchUse(ICmpInst &I) { return false; } -/// Given an exploded icmp instruction, return true if the comparison only -/// checks the sign bit. If it only checks the sign bit, set TrueIfSigned if the -/// result of the comparison is true when the input value is signed. -static bool isSignBitCheck(ICmpInst::Predicate Pred, const APInt &RHS, - bool &TrueIfSigned) { - switch (Pred) { - case ICmpInst::ICMP_SLT: // True if LHS s< 0 - TrueIfSigned = true; - return RHS.isNullValue(); - case ICmpInst::ICMP_SLE: // True if LHS s<= RHS and RHS == -1 - TrueIfSigned = true; - return RHS.isAllOnesValue(); - case ICmpInst::ICMP_SGT: // True if LHS s> -1 - TrueIfSigned = false; - return RHS.isAllOnesValue(); - case ICmpInst::ICMP_UGT: - // True if LHS u> RHS and RHS == high-bit-mask - 1 - TrueIfSigned = true; - return RHS.isMaxSignedValue(); - case ICmpInst::ICMP_UGE: - // True if LHS u>= RHS and RHS == high-bit-mask (2^7, 2^15, 2^31, etc) - TrueIfSigned = true; - return RHS.isSignMask(); - default: - return false; - } -} - /// Returns true if the exploded icmp can be expressed as a signed comparison /// to zero and updates the predicate accordingly. /// The signedness of the comparison is preserved. diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h index dcdbee15fe56b..e04cd346b6fc5 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -113,6 +113,45 @@ static inline bool isCanonicalPredicate(CmpInst::Predicate Pred) { } } +/// Given an exploded icmp instruction, return true if the comparison only +/// checks the sign bit. If it only checks the sign bit, set TrueIfSigned if the +/// result of the comparison is true when the input value is signed. +inline bool isSignBitCheck(ICmpInst::Predicate Pred, const APInt &RHS, + bool &TrueIfSigned) { + switch (Pred) { + case ICmpInst::ICMP_SLT: // True if LHS s< 0 + TrueIfSigned = true; + return RHS.isNullValue(); + case ICmpInst::ICMP_SLE: // True if LHS s<= -1 + TrueIfSigned = true; + return RHS.isAllOnesValue(); + case ICmpInst::ICMP_SGT: // True if LHS s> -1 + TrueIfSigned = false; + return RHS.isAllOnesValue(); + case ICmpInst::ICMP_SGE: // True if LHS s>= 0 + TrueIfSigned = false; + return RHS.isNullValue(); + case ICmpInst::ICMP_UGT: + // True if LHS u> RHS and RHS == sign-bit-mask - 1 + TrueIfSigned = true; + return RHS.isMaxSignedValue(); + case ICmpInst::ICMP_UGE: + // True if LHS u>= RHS and RHS == sign-bit-mask (2^7, 2^15, 2^31, etc) + TrueIfSigned = true; + return RHS.isMinSignedValue(); + case ICmpInst::ICMP_ULT: + // True if LHS u< RHS and RHS == sign-bit-mask (2^7, 2^15, 2^31, etc) + TrueIfSigned = false; + return RHS.isMinSignedValue(); + case ICmpInst::ICMP_ULE: + // True if LHS u<= RHS and RHS == sign-bit-mask - 1 + TrueIfSigned = false; + return RHS.isMaxSignedValue(); + default: + return false; + } +} + llvm::Optional> getFlippedStrictnessPredicateAndConstant(CmpInst::Predicate Pred, Constant *C); From 3da71714cbf0e3682b24adbd4ba0b500ff947331 Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Mon, 7 Oct 2019 20:53:16 +0000 Subject: [PATCH 163/254] [InstCombine][NFC] Tests for "conditional sign-extend of high-bit-extract" pattern (PR42389) https://bugs.llvm.org/show_bug.cgi?id=42389 llvm-svn: 373963 --- ...e-length-signext-after-high-bit-extract.ll | 1040 +++++++++++++++++ 1 file changed, 1040 insertions(+) create mode 100644 llvm/test/Transforms/InstCombine/conditional-variable-length-signext-after-high-bit-extract.ll diff --git a/llvm/test/Transforms/InstCombine/conditional-variable-length-signext-after-high-bit-extract.ll b/llvm/test/Transforms/InstCombine/conditional-variable-length-signext-after-high-bit-extract.ll new file mode 100644 index 0000000000000..70877dd526f4a --- /dev/null +++ b/llvm/test/Transforms/InstCombine/conditional-variable-length-signext-after-high-bit-extract.ll @@ -0,0 +1,1040 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt %s -instcombine -S | FileCheck %s + +; If we extract (via lshr) some high bits, and then perform their sign-extension +; conditionally depending on whether the extracted value is negative or not +; (i.e. interpreting the highest extracted bit, which was the original signbit +; of the value from which we extracted as a signbit), then we should just +; perform extraction via `ashr`. + +; Base patterns. + +declare void @use1(i1) +declare void @use16(i16) +declare void @use32(i32) +declare void @use64(i64) + +define i32 @t0_notrunc_add(i32 %data, i32 %nbits) { +; CHECK-LABEL: @t0_notrunc_add( +; CHECK-NEXT: [[LOW_BITS_TO_SKIP:%.*]] = sub i32 32, [[NBITS:%.*]] +; CHECK-NEXT: [[HIGH_BITS_EXTRACTED:%.*]] = lshr i32 [[DATA:%.*]], [[LOW_BITS_TO_SKIP]] +; CHECK-NEXT: [[SHOULD_SIGNEXT:%.*]] = icmp slt i32 [[DATA]], 0 +; CHECK-NEXT: [[ALL_BITS_EXCEPT_LOW_NBITS:%.*]] = shl i32 -1, [[NBITS]] +; CHECK-NEXT: [[MAGIC:%.*]] = select i1 [[SHOULD_SIGNEXT]], i32 [[ALL_BITS_EXCEPT_LOW_NBITS]], i32 0 +; CHECK-NEXT: call void @use32(i32 [[LOW_BITS_TO_SKIP]]) +; CHECK-NEXT: call void @use32(i32 [[HIGH_BITS_EXTRACTED]]) +; CHECK-NEXT: call void @use1(i1 [[SHOULD_SIGNEXT]]) +; CHECK-NEXT: call void @use32(i32 [[ALL_BITS_EXCEPT_LOW_NBITS]]) +; CHECK-NEXT: call void @use32(i32 [[MAGIC]]) +; CHECK-NEXT: [[SIGNEXTENDED:%.*]] = add i32 [[HIGH_BITS_EXTRACTED]], [[MAGIC]] +; CHECK-NEXT: ret i32 [[SIGNEXTENDED]] +; + %low_bits_to_skip = sub i32 32, %nbits + %high_bits_extracted = lshr i32 %data, %low_bits_to_skip + %should_signext = icmp slt i32 %data, 0 + %all_bits_except_low_nbits = shl i32 -1, %nbits + %magic = select i1 %should_signext, i32 %all_bits_except_low_nbits, i32 0 + + call void @use32(i32 %low_bits_to_skip) + call void @use32(i32 %high_bits_extracted) + call void @use1(i1 %should_signext) + call void @use32(i32 %all_bits_except_low_nbits) + call void @use32(i32 %magic) + + %signextended = add i32 %high_bits_extracted, %magic + ret i32 %signextended +} + +define i32 @t1_notrunc_sub(i32 %data, i32 %nbits) { +; CHECK-LABEL: @t1_notrunc_sub( +; CHECK-NEXT: [[LOW_BITS_TO_SKIP:%.*]] = sub i32 32, [[NBITS:%.*]] +; CHECK-NEXT: [[HIGH_BITS_EXTRACTED:%.*]] = lshr i32 [[DATA:%.*]], [[LOW_BITS_TO_SKIP]] +; CHECK-NEXT: [[SHOULD_SIGNEXT:%.*]] = icmp slt i32 [[DATA]], 0 +; CHECK-NEXT: [[HIGHER_BIT_AFTER_SIGNBIT:%.*]] = shl i32 1, [[NBITS]] +; CHECK-NEXT: [[MAGIC:%.*]] = select i1 [[SHOULD_SIGNEXT]], i32 [[HIGHER_BIT_AFTER_SIGNBIT]], i32 0 +; CHECK-NEXT: call void @use32(i32 [[LOW_BITS_TO_SKIP]]) +; CHECK-NEXT: call void @use32(i32 [[HIGH_BITS_EXTRACTED]]) +; CHECK-NEXT: call void @use1(i1 [[SHOULD_SIGNEXT]]) +; CHECK-NEXT: call void @use32(i32 [[HIGHER_BIT_AFTER_SIGNBIT]]) +; CHECK-NEXT: call void @use32(i32 [[MAGIC]]) +; CHECK-NEXT: [[SIGNEXTENDED:%.*]] = sub i32 [[HIGH_BITS_EXTRACTED]], [[MAGIC]] +; CHECK-NEXT: ret i32 [[SIGNEXTENDED]] +; + %low_bits_to_skip = sub i32 32, %nbits + %high_bits_extracted = lshr i32 %data, %low_bits_to_skip + %should_signext = icmp slt i32 %data, 0 + %higher_bit_after_signbit = shl i32 1, %nbits + %magic = select i1 %should_signext, i32 %higher_bit_after_signbit, i32 0 + + call void @use32(i32 %low_bits_to_skip) + call void @use32(i32 %high_bits_extracted) + call void @use1(i1 %should_signext) + call void @use32(i32 %higher_bit_after_signbit) + call void @use32(i32 %magic) + + %signextended = sub i32 %high_bits_extracted, %magic + ret i32 %signextended +} + +define i32 @t2_trunc_add(i64 %data, i32 %nbits) { +; CHECK-LABEL: @t2_trunc_add( +; CHECK-NEXT: [[LOW_BITS_TO_SKIP:%.*]] = sub i32 64, [[NBITS:%.*]] +; CHECK-NEXT: [[LOW_BITS_TO_SKIP_WIDE:%.*]] = zext i32 [[LOW_BITS_TO_SKIP]] to i64 +; CHECK-NEXT: [[HIGH_BITS_EXTRACTED_WIDE:%.*]] = lshr i64 [[DATA:%.*]], [[LOW_BITS_TO_SKIP_WIDE]] +; CHECK-NEXT: [[HIGH_BITS_EXTRACTED:%.*]] = trunc i64 [[HIGH_BITS_EXTRACTED_WIDE]] to i32 +; CHECK-NEXT: [[SHOULD_SIGNEXT:%.*]] = icmp slt i64 [[DATA]], 0 +; CHECK-NEXT: [[ALL_BITS_EXCEPT_LOW_NBITS:%.*]] = shl i32 -1, [[NBITS]] +; CHECK-NEXT: [[MAGIC:%.*]] = select i1 [[SHOULD_SIGNEXT]], i32 [[ALL_BITS_EXCEPT_LOW_NBITS]], i32 0 +; CHECK-NEXT: call void @use32(i32 [[LOW_BITS_TO_SKIP]]) +; CHECK-NEXT: call void @use64(i64 [[LOW_BITS_TO_SKIP_WIDE]]) +; CHECK-NEXT: call void @use64(i64 [[HIGH_BITS_EXTRACTED_WIDE]]) +; CHECK-NEXT: call void @use32(i32 [[HIGH_BITS_EXTRACTED]]) +; CHECK-NEXT: call void @use1(i1 [[SHOULD_SIGNEXT]]) +; CHECK-NEXT: call void @use32(i32 [[ALL_BITS_EXCEPT_LOW_NBITS]]) +; CHECK-NEXT: [[SIGNEXTENDED:%.*]] = add i32 [[MAGIC]], [[HIGH_BITS_EXTRACTED]] +; CHECK-NEXT: ret i32 [[SIGNEXTENDED]] +; + %low_bits_to_skip = sub i32 64, %nbits + %low_bits_to_skip_wide = zext i32 %low_bits_to_skip to i64 + %high_bits_extracted_wide = lshr i64 %data, %low_bits_to_skip_wide + %high_bits_extracted = trunc i64 %high_bits_extracted_wide to i32 + %should_signext = icmp slt i64 %data, 0 + %all_bits_except_low_nbits = shl i32 -1, %nbits + %magic = select i1 %should_signext, i32 %all_bits_except_low_nbits, i32 0 ; one-use + + call void @use32(i32 %low_bits_to_skip) + call void @use64(i64 %low_bits_to_skip_wide) + call void @use64(i64 %high_bits_extracted_wide) + call void @use32(i32 %high_bits_extracted) + call void @use1(i1 %should_signext) + call void @use32(i32 %all_bits_except_low_nbits) + + %signextended = add i32 %magic, %high_bits_extracted + ret i32 %signextended +} + +define i32 @t3_trunc_sub(i64 %data, i32 %nbits) { +; CHECK-LABEL: @t3_trunc_sub( +; CHECK-NEXT: [[LOW_BITS_TO_SKIP:%.*]] = sub i32 64, [[NBITS:%.*]] +; CHECK-NEXT: [[LOW_BITS_TO_SKIP_WIDE:%.*]] = zext i32 [[LOW_BITS_TO_SKIP]] to i64 +; CHECK-NEXT: [[HIGH_BITS_EXTRACTED_WIDE:%.*]] = lshr i64 [[DATA:%.*]], [[LOW_BITS_TO_SKIP_WIDE]] +; CHECK-NEXT: [[HIGH_BITS_EXTRACTED:%.*]] = trunc i64 [[HIGH_BITS_EXTRACTED_WIDE]] to i32 +; CHECK-NEXT: [[SHOULD_SIGNEXT:%.*]] = icmp slt i64 [[DATA]], 0 +; CHECK-NEXT: [[HIGHER_BIT_AFTER_SIGNBIT:%.*]] = shl i32 1, [[NBITS]] +; CHECK-NEXT: [[MAGIC:%.*]] = select i1 [[SHOULD_SIGNEXT]], i32 [[HIGHER_BIT_AFTER_SIGNBIT]], i32 0 +; CHECK-NEXT: call void @use32(i32 [[LOW_BITS_TO_SKIP]]) +; CHECK-NEXT: call void @use64(i64 [[LOW_BITS_TO_SKIP_WIDE]]) +; CHECK-NEXT: call void @use64(i64 [[HIGH_BITS_EXTRACTED_WIDE]]) +; CHECK-NEXT: call void @use32(i32 [[HIGH_BITS_EXTRACTED]]) +; CHECK-NEXT: call void @use1(i1 [[SHOULD_SIGNEXT]]) +; CHECK-NEXT: call void @use32(i32 [[HIGHER_BIT_AFTER_SIGNBIT]]) +; CHECK-NEXT: [[SIGNEXTENDED:%.*]] = sub i32 [[HIGH_BITS_EXTRACTED]], [[MAGIC]] +; CHECK-NEXT: ret i32 [[SIGNEXTENDED]] +; + %low_bits_to_skip = sub i32 64, %nbits + %low_bits_to_skip_wide = zext i32 %low_bits_to_skip to i64 + %high_bits_extracted_wide = lshr i64 %data, %low_bits_to_skip_wide + %high_bits_extracted = trunc i64 %high_bits_extracted_wide to i32 + %should_signext = icmp slt i64 %data, 0 + %higher_bit_after_signbit = shl i32 1, %nbits + %magic = select i1 %should_signext, i32 %higher_bit_after_signbit, i32 0 ; one-use + + call void @use32(i32 %low_bits_to_skip) + call void @use64(i64 %low_bits_to_skip_wide) + call void @use64(i64 %high_bits_extracted_wide) + call void @use32(i32 %high_bits_extracted) + call void @use1(i1 %should_signext) + call void @use32(i32 %higher_bit_after_signbit) + + %signextended = sub i32 %high_bits_extracted, %magic + ret i32 %signextended +} + +; Commutativity + +define i32 @t4_commutativity0(i32 %data, i32 %nbits) { +; CHECK-LABEL: @t4_commutativity0( +; CHECK-NEXT: [[LOW_BITS_TO_SKIP:%.*]] = sub i32 32, [[NBITS:%.*]] +; CHECK-NEXT: [[HIGH_BITS_EXTRACTED:%.*]] = lshr i32 [[DATA:%.*]], [[LOW_BITS_TO_SKIP]] +; CHECK-NEXT: [[SHOULD_SIGNEXT:%.*]] = icmp slt i32 [[DATA]], 0 +; CHECK-NEXT: [[ALL_BITS_EXCEPT_LOW_NBITS:%.*]] = shl i32 -1, [[NBITS]] +; CHECK-NEXT: [[MAGIC:%.*]] = select i1 [[SHOULD_SIGNEXT]], i32 [[ALL_BITS_EXCEPT_LOW_NBITS]], i32 0 +; CHECK-NEXT: call void @use32(i32 [[LOW_BITS_TO_SKIP]]) +; CHECK-NEXT: call void @use32(i32 [[HIGH_BITS_EXTRACTED]]) +; CHECK-NEXT: call void @use1(i1 [[SHOULD_SIGNEXT]]) +; CHECK-NEXT: call void @use32(i32 [[ALL_BITS_EXCEPT_LOW_NBITS]]) +; CHECK-NEXT: call void @use32(i32 [[MAGIC]]) +; CHECK-NEXT: [[SIGNEXTENDED:%.*]] = add i32 [[HIGH_BITS_EXTRACTED]], [[MAGIC]] +; CHECK-NEXT: ret i32 [[SIGNEXTENDED]] +; + %low_bits_to_skip = sub i32 32, %nbits + %high_bits_extracted = lshr i32 %data, %low_bits_to_skip + %should_signext = icmp slt i32 %data, 0 + %all_bits_except_low_nbits = shl i32 -1, %nbits + %magic = select i1 %should_signext, i32 %all_bits_except_low_nbits, i32 0 + + call void @use32(i32 %low_bits_to_skip) + call void @use32(i32 %high_bits_extracted) + call void @use1(i1 %should_signext) + call void @use32(i32 %all_bits_except_low_nbits) + call void @use32(i32 %magic) + + %signextended = add i32 %high_bits_extracted, %magic + ret i32 %signextended +} +define i32 @t5_commutativity1(i32 %data, i32 %nbits) { +; CHECK-LABEL: @t5_commutativity1( +; CHECK-NEXT: [[LOW_BITS_TO_SKIP:%.*]] = sub i32 32, [[NBITS:%.*]] +; CHECK-NEXT: [[HIGH_BITS_EXTRACTED:%.*]] = lshr i32 [[DATA:%.*]], [[LOW_BITS_TO_SKIP]] +; CHECK-NEXT: [[SHOULD_SIGNEXT:%.*]] = icmp sgt i32 [[DATA]], -1 +; CHECK-NEXT: [[ALL_BITS_EXCEPT_LOW_NBITS:%.*]] = shl i32 -1, [[NBITS]] +; CHECK-NEXT: [[MAGIC:%.*]] = select i1 [[SHOULD_SIGNEXT]], i32 0, i32 [[ALL_BITS_EXCEPT_LOW_NBITS]] +; CHECK-NEXT: call void @use32(i32 [[LOW_BITS_TO_SKIP]]) +; CHECK-NEXT: call void @use32(i32 [[HIGH_BITS_EXTRACTED]]) +; CHECK-NEXT: call void @use1(i1 [[SHOULD_SIGNEXT]]) +; CHECK-NEXT: call void @use32(i32 [[ALL_BITS_EXCEPT_LOW_NBITS]]) +; CHECK-NEXT: call void @use32(i32 [[MAGIC]]) +; CHECK-NEXT: [[SIGNEXTENDED:%.*]] = add i32 [[HIGH_BITS_EXTRACTED]], [[MAGIC]] +; CHECK-NEXT: ret i32 [[SIGNEXTENDED]] +; + %low_bits_to_skip = sub i32 32, %nbits + %high_bits_extracted = lshr i32 %data, %low_bits_to_skip + %should_signext = icmp sgt i32 %data, -1 ; swapped + %all_bits_except_low_nbits = shl i32 -1, %nbits + %magic = select i1 %should_signext, i32 0, i32 %all_bits_except_low_nbits ; swapped + + call void @use32(i32 %low_bits_to_skip) + call void @use32(i32 %high_bits_extracted) + call void @use1(i1 %should_signext) + call void @use32(i32 %all_bits_except_low_nbits) + call void @use32(i32 %magic) + + %signextended = add i32 %high_bits_extracted, %magic + ret i32 %signextended +} +define i32 @t6_commutativity2(i32 %data, i32 %nbits) { +; CHECK-LABEL: @t6_commutativity2( +; CHECK-NEXT: [[LOW_BITS_TO_SKIP:%.*]] = sub i32 32, [[NBITS:%.*]] +; CHECK-NEXT: [[HIGH_BITS_EXTRACTED:%.*]] = lshr i32 [[DATA:%.*]], [[LOW_BITS_TO_SKIP]] +; CHECK-NEXT: [[SHOULD_SIGNEXT:%.*]] = icmp slt i32 [[DATA]], 0 +; CHECK-NEXT: [[ALL_BITS_EXCEPT_LOW_NBITS:%.*]] = shl i32 -1, [[NBITS]] +; CHECK-NEXT: [[MAGIC:%.*]] = select i1 [[SHOULD_SIGNEXT]], i32 [[ALL_BITS_EXCEPT_LOW_NBITS]], i32 0 +; CHECK-NEXT: call void @use32(i32 [[LOW_BITS_TO_SKIP]]) +; CHECK-NEXT: call void @use32(i32 [[HIGH_BITS_EXTRACTED]]) +; CHECK-NEXT: call void @use1(i1 [[SHOULD_SIGNEXT]]) +; CHECK-NEXT: call void @use32(i32 [[ALL_BITS_EXCEPT_LOW_NBITS]]) +; CHECK-NEXT: call void @use32(i32 [[MAGIC]]) +; CHECK-NEXT: [[SIGNEXTENDED:%.*]] = add i32 [[MAGIC]], [[HIGH_BITS_EXTRACTED]] +; CHECK-NEXT: ret i32 [[SIGNEXTENDED]] +; + %low_bits_to_skip = sub i32 32, %nbits + %high_bits_extracted = lshr i32 %data, %low_bits_to_skip + %should_signext = icmp slt i32 %data, 0 + %all_bits_except_low_nbits = shl i32 -1, %nbits + %magic = select i1 %should_signext, i32 %all_bits_except_low_nbits, i32 0 + + call void @use32(i32 %low_bits_to_skip) + call void @use32(i32 %high_bits_extracted) + call void @use1(i1 %should_signext) + call void @use32(i32 %all_bits_except_low_nbits) + call void @use32(i32 %magic) + + %signextended = add i32 %magic, %high_bits_extracted ; swapped + ret i32 %signextended +} + +; Extra uses + +define i32 @t7_trunc_extrause0(i64 %data, i32 %nbits) { +; CHECK-LABEL: @t7_trunc_extrause0( +; CHECK-NEXT: [[LOW_BITS_TO_SKIP:%.*]] = sub i32 64, [[NBITS:%.*]] +; CHECK-NEXT: [[LOW_BITS_TO_SKIP_WIDE:%.*]] = zext i32 [[LOW_BITS_TO_SKIP]] to i64 +; CHECK-NEXT: [[HIGH_BITS_EXTRACTED_WIDE:%.*]] = lshr i64 [[DATA:%.*]], [[LOW_BITS_TO_SKIP_WIDE]] +; CHECK-NEXT: [[HIGH_BITS_EXTRACTED:%.*]] = trunc i64 [[HIGH_BITS_EXTRACTED_WIDE]] to i32 +; CHECK-NEXT: [[SHOULD_SIGNEXT:%.*]] = icmp slt i64 [[DATA]], 0 +; CHECK-NEXT: [[ALL_BITS_EXCEPT_LOW_NBITS:%.*]] = shl i32 -1, [[NBITS]] +; CHECK-NEXT: [[MAGIC:%.*]] = select i1 [[SHOULD_SIGNEXT]], i32 [[ALL_BITS_EXCEPT_LOW_NBITS]], i32 0 +; CHECK-NEXT: call void @use32(i32 [[LOW_BITS_TO_SKIP]]) +; CHECK-NEXT: call void @use64(i64 [[LOW_BITS_TO_SKIP_WIDE]]) +; CHECK-NEXT: call void @use64(i64 [[HIGH_BITS_EXTRACTED_WIDE]]) +; CHECK-NEXT: call void @use32(i32 [[HIGH_BITS_EXTRACTED]]) +; CHECK-NEXT: call void @use1(i1 [[SHOULD_SIGNEXT]]) +; CHECK-NEXT: call void @use32(i32 [[ALL_BITS_EXCEPT_LOW_NBITS]]) +; CHECK-NEXT: [[SIGNEXTENDED:%.*]] = add i32 [[MAGIC]], [[HIGH_BITS_EXTRACTED]] +; CHECK-NEXT: ret i32 [[SIGNEXTENDED]] +; + %low_bits_to_skip = sub i32 64, %nbits + %low_bits_to_skip_wide = zext i32 %low_bits_to_skip to i64 + %high_bits_extracted_wide = lshr i64 %data, %low_bits_to_skip_wide + %high_bits_extracted = trunc i64 %high_bits_extracted_wide to i32 ; has extra use + %should_signext = icmp slt i64 %data, 0 + %all_bits_except_low_nbits = shl i32 -1, %nbits + %magic = select i1 %should_signext, i32 %all_bits_except_low_nbits, i32 0 ; one-use + + call void @use32(i32 %low_bits_to_skip) + call void @use64(i64 %low_bits_to_skip_wide) + call void @use64(i64 %high_bits_extracted_wide) + call void @use32(i32 %high_bits_extracted) + call void @use1(i1 %should_signext) + call void @use32(i32 %all_bits_except_low_nbits) + + %signextended = add i32 %magic, %high_bits_extracted + ret i32 %signextended +} +define i32 @t8_trunc_extrause1(i64 %data, i32 %nbits) { +; CHECK-LABEL: @t8_trunc_extrause1( +; CHECK-NEXT: [[LOW_BITS_TO_SKIP:%.*]] = sub i32 64, [[NBITS:%.*]] +; CHECK-NEXT: [[LOW_BITS_TO_SKIP_WIDE:%.*]] = zext i32 [[LOW_BITS_TO_SKIP]] to i64 +; CHECK-NEXT: [[HIGH_BITS_EXTRACTED_WIDE:%.*]] = lshr i64 [[DATA:%.*]], [[LOW_BITS_TO_SKIP_WIDE]] +; CHECK-NEXT: [[HIGH_BITS_EXTRACTED:%.*]] = trunc i64 [[HIGH_BITS_EXTRACTED_WIDE]] to i32 +; CHECK-NEXT: [[SHOULD_SIGNEXT:%.*]] = icmp slt i64 [[DATA]], 0 +; CHECK-NEXT: [[ALL_BITS_EXCEPT_LOW_NBITS:%.*]] = shl i32 -1, [[NBITS]] +; CHECK-NEXT: [[MAGIC:%.*]] = select i1 [[SHOULD_SIGNEXT]], i32 [[ALL_BITS_EXCEPT_LOW_NBITS]], i32 0 +; CHECK-NEXT: call void @use32(i32 [[LOW_BITS_TO_SKIP]]) +; CHECK-NEXT: call void @use64(i64 [[LOW_BITS_TO_SKIP_WIDE]]) +; CHECK-NEXT: call void @use64(i64 [[HIGH_BITS_EXTRACTED_WIDE]]) +; CHECK-NEXT: call void @use1(i1 [[SHOULD_SIGNEXT]]) +; CHECK-NEXT: call void @use32(i32 [[ALL_BITS_EXCEPT_LOW_NBITS]]) +; CHECK-NEXT: call void @use32(i32 [[MAGIC]]) +; CHECK-NEXT: [[SIGNEXTENDED:%.*]] = add i32 [[MAGIC]], [[HIGH_BITS_EXTRACTED]] +; CHECK-NEXT: ret i32 [[SIGNEXTENDED]] +; + %low_bits_to_skip = sub i32 64, %nbits + %low_bits_to_skip_wide = zext i32 %low_bits_to_skip to i64 + %high_bits_extracted_wide = lshr i64 %data, %low_bits_to_skip_wide + %high_bits_extracted = trunc i64 %high_bits_extracted_wide to i32 ; one-use + %should_signext = icmp slt i64 %data, 0 + %all_bits_except_low_nbits = shl i32 -1, %nbits + %magic = select i1 %should_signext, i32 %all_bits_except_low_nbits, i32 0 ; has extra use + + call void @use32(i32 %low_bits_to_skip) + call void @use64(i64 %low_bits_to_skip_wide) + call void @use64(i64 %high_bits_extracted_wide) + call void @use1(i1 %should_signext) + call void @use32(i32 %all_bits_except_low_nbits) + call void @use32(i32 %magic) + + %signextended = add i32 %magic, %high_bits_extracted + ret i32 %signextended +} +define i32 @n9_trunc_extrause2(i64 %data, i32 %nbits) { +; CHECK-LABEL: @n9_trunc_extrause2( +; CHECK-NEXT: [[LOW_BITS_TO_SKIP:%.*]] = sub i32 64, [[NBITS:%.*]] +; CHECK-NEXT: [[LOW_BITS_TO_SKIP_WIDE:%.*]] = zext i32 [[LOW_BITS_TO_SKIP]] to i64 +; CHECK-NEXT: [[HIGH_BITS_EXTRACTED_WIDE:%.*]] = lshr i64 [[DATA:%.*]], [[LOW_BITS_TO_SKIP_WIDE]] +; CHECK-NEXT: [[HIGH_BITS_EXTRACTED:%.*]] = trunc i64 [[HIGH_BITS_EXTRACTED_WIDE]] to i32 +; CHECK-NEXT: [[SHOULD_SIGNEXT:%.*]] = icmp slt i64 [[DATA]], 0 +; CHECK-NEXT: [[ALL_BITS_EXCEPT_LOW_NBITS:%.*]] = shl i32 -1, [[NBITS]] +; CHECK-NEXT: [[MAGIC:%.*]] = select i1 [[SHOULD_SIGNEXT]], i32 [[ALL_BITS_EXCEPT_LOW_NBITS]], i32 0 +; CHECK-NEXT: call void @use32(i32 [[LOW_BITS_TO_SKIP]]) +; CHECK-NEXT: call void @use64(i64 [[LOW_BITS_TO_SKIP_WIDE]]) +; CHECK-NEXT: call void @use64(i64 [[HIGH_BITS_EXTRACTED_WIDE]]) +; CHECK-NEXT: call void @use32(i32 [[HIGH_BITS_EXTRACTED]]) +; CHECK-NEXT: call void @use1(i1 [[SHOULD_SIGNEXT]]) +; CHECK-NEXT: call void @use32(i32 [[ALL_BITS_EXCEPT_LOW_NBITS]]) +; CHECK-NEXT: call void @use32(i32 [[MAGIC]]) +; CHECK-NEXT: [[SIGNEXTENDED:%.*]] = add i32 [[MAGIC]], [[HIGH_BITS_EXTRACTED]] +; CHECK-NEXT: ret i32 [[SIGNEXTENDED]] +; + %low_bits_to_skip = sub i32 64, %nbits + %low_bits_to_skip_wide = zext i32 %low_bits_to_skip to i64 + %high_bits_extracted_wide = lshr i64 %data, %low_bits_to_skip_wide + %high_bits_extracted = trunc i64 %high_bits_extracted_wide to i32 ; has extra use + %should_signext = icmp slt i64 %data, 0 + %all_bits_except_low_nbits = shl i32 -1, %nbits + %magic = select i1 %should_signext, i32 %all_bits_except_low_nbits, i32 0 ; has extra use + + call void @use32(i32 %low_bits_to_skip) + call void @use64(i64 %low_bits_to_skip_wide) + call void @use64(i64 %high_bits_extracted_wide) + call void @use32(i32 %high_bits_extracted) + call void @use1(i1 %should_signext) + call void @use32(i32 %all_bits_except_low_nbits) + call void @use32(i32 %magic) + + %signextended = add i32 %magic, %high_bits_extracted + ret i32 %signextended +} + +define i32 @t10_preserve_exact(i32 %data, i32 %nbits) { +; CHECK-LABEL: @t10_preserve_exact( +; CHECK-NEXT: [[LOW_BITS_TO_SKIP:%.*]] = sub i32 32, [[NBITS:%.*]] +; CHECK-NEXT: [[HIGH_BITS_EXTRACTED:%.*]] = lshr exact i32 [[DATA:%.*]], [[LOW_BITS_TO_SKIP]] +; CHECK-NEXT: [[SHOULD_SIGNEXT:%.*]] = icmp slt i32 [[DATA]], 0 +; CHECK-NEXT: [[ALL_BITS_EXCEPT_LOW_NBITS:%.*]] = shl i32 -1, [[NBITS]] +; CHECK-NEXT: [[MAGIC:%.*]] = select i1 [[SHOULD_SIGNEXT]], i32 [[ALL_BITS_EXCEPT_LOW_NBITS]], i32 0 +; CHECK-NEXT: call void @use32(i32 [[LOW_BITS_TO_SKIP]]) +; CHECK-NEXT: call void @use32(i32 [[HIGH_BITS_EXTRACTED]]) +; CHECK-NEXT: call void @use1(i1 [[SHOULD_SIGNEXT]]) +; CHECK-NEXT: call void @use32(i32 [[ALL_BITS_EXCEPT_LOW_NBITS]]) +; CHECK-NEXT: call void @use32(i32 [[MAGIC]]) +; CHECK-NEXT: [[SIGNEXTENDED:%.*]] = add i32 [[HIGH_BITS_EXTRACTED]], [[MAGIC]] +; CHECK-NEXT: ret i32 [[SIGNEXTENDED]] +; + %low_bits_to_skip = sub i32 32, %nbits + %high_bits_extracted = lshr exact i32 %data, %low_bits_to_skip + %should_signext = icmp slt i32 %data, 0 + %all_bits_except_low_nbits = shl i32 -1, %nbits + %magic = select i1 %should_signext, i32 %all_bits_except_low_nbits, i32 0 + + call void @use32(i32 %low_bits_to_skip) + call void @use32(i32 %high_bits_extracted) + call void @use1(i1 %should_signext) + call void @use32(i32 %all_bits_except_low_nbits) + call void @use32(i32 %magic) + + %signextended = add i32 %high_bits_extracted, %magic + ret i32 %signextended +} + +define i32 @t11_different_zext_of_shamt(i32 %data, i8 %nbits) { +; CHECK-LABEL: @t11_different_zext_of_shamt( +; CHECK-NEXT: [[NBITS_16BIT:%.*]] = zext i8 [[NBITS:%.*]] to i16 +; CHECK-NEXT: [[LOW_BITS_TO_SKIP:%.*]] = sub nsw i16 32, [[NBITS_16BIT]] +; CHECK-NEXT: [[LOW_BITS_TO_SKIP_32:%.*]] = zext i16 [[LOW_BITS_TO_SKIP]] to i32 +; CHECK-NEXT: [[HIGH_BITS_EXTRACTED:%.*]] = lshr i32 [[DATA:%.*]], [[LOW_BITS_TO_SKIP_32]] +; CHECK-NEXT: [[SHOULD_SIGNEXT:%.*]] = icmp slt i32 [[DATA]], 0 +; CHECK-NEXT: [[NBITS_32BIT:%.*]] = zext i8 [[NBITS]] to i32 +; CHECK-NEXT: [[ALL_BITS_EXCEPT_LOW_NBITS:%.*]] = shl i32 -1, [[NBITS_32BIT]] +; CHECK-NEXT: [[MAGIC:%.*]] = select i1 [[SHOULD_SIGNEXT]], i32 [[ALL_BITS_EXCEPT_LOW_NBITS]], i32 0 +; CHECK-NEXT: call void @use16(i16 [[NBITS_16BIT]]) +; CHECK-NEXT: call void @use16(i16 [[LOW_BITS_TO_SKIP]]) +; CHECK-NEXT: call void @use32(i32 [[LOW_BITS_TO_SKIP_32]]) +; CHECK-NEXT: call void @use32(i32 [[HIGH_BITS_EXTRACTED]]) +; CHECK-NEXT: call void @use1(i1 [[SHOULD_SIGNEXT]]) +; CHECK-NEXT: call void @use32(i32 [[NBITS_32BIT]]) +; CHECK-NEXT: call void @use32(i32 [[ALL_BITS_EXCEPT_LOW_NBITS]]) +; CHECK-NEXT: call void @use32(i32 [[MAGIC]]) +; CHECK-NEXT: [[SIGNEXTENDED:%.*]] = add i32 [[HIGH_BITS_EXTRACTED]], [[MAGIC]] +; CHECK-NEXT: ret i32 [[SIGNEXTENDED]] +; + %nbits_16bit = zext i8 %nbits to i16 + %low_bits_to_skip = sub i16 32, %nbits_16bit + %low_bits_to_skip_32 = zext i16 %low_bits_to_skip to i32 + %high_bits_extracted = lshr i32 %data, %low_bits_to_skip_32 + %should_signext = icmp slt i32 %data, 0 + %nbits_32bit = zext i8 %nbits to i32 + %all_bits_except_low_nbits = shl i32 -1, %nbits_32bit + %magic = select i1 %should_signext, i32 %all_bits_except_low_nbits, i32 0 + + call void @use16(i16 %nbits_16bit) + call void @use16(i16 %low_bits_to_skip) + call void @use32(i32 %low_bits_to_skip_32) + call void @use32(i32 %high_bits_extracted) + call void @use1(i1 %should_signext) + call void @use32(i32 %nbits_32bit) + call void @use32(i32 %all_bits_except_low_nbits) + call void @use32(i32 %magic) + + %signextended = add i32 %high_bits_extracted, %magic + ret i32 %signextended +} + +define i32 @t12_add_sext_of_magic(i32 %data, i8 %nbits) { +; CHECK-LABEL: @t12_add_sext_of_magic( +; CHECK-NEXT: [[NBITS_32BIT:%.*]] = zext i8 [[NBITS:%.*]] to i32 +; CHECK-NEXT: [[LOW_BITS_TO_SKIP:%.*]] = sub nsw i32 32, [[NBITS_32BIT]] +; CHECK-NEXT: [[HIGH_BITS_EXTRACTED:%.*]] = lshr i32 [[DATA:%.*]], [[LOW_BITS_TO_SKIP]] +; CHECK-NEXT: [[SHOULD_SIGNEXT:%.*]] = icmp slt i32 [[DATA]], 0 +; CHECK-NEXT: [[NBITS_16BIT:%.*]] = zext i8 [[NBITS]] to i16 +; CHECK-NEXT: [[ALL_BITS_EXCEPT_LOW_NBITS:%.*]] = shl i16 -1, [[NBITS_16BIT]] +; CHECK-NEXT: [[MAGIC:%.*]] = select i1 [[SHOULD_SIGNEXT]], i16 [[ALL_BITS_EXCEPT_LOW_NBITS]], i16 0 +; CHECK-NEXT: [[MAGIC_WIDE:%.*]] = sext i16 [[MAGIC]] to i32 +; CHECK-NEXT: call void @use32(i32 [[NBITS_32BIT]]) +; CHECK-NEXT: call void @use32(i32 [[LOW_BITS_TO_SKIP]]) +; CHECK-NEXT: call void @use32(i32 [[HIGH_BITS_EXTRACTED]]) +; CHECK-NEXT: call void @use1(i1 [[SHOULD_SIGNEXT]]) +; CHECK-NEXT: call void @use16(i16 [[NBITS_16BIT]]) +; CHECK-NEXT: call void @use16(i16 [[ALL_BITS_EXCEPT_LOW_NBITS]]) +; CHECK-NEXT: call void @use16(i16 [[MAGIC]]) +; CHECK-NEXT: call void @use32(i32 [[MAGIC_WIDE]]) +; CHECK-NEXT: [[SIGNEXTENDED:%.*]] = add i32 [[HIGH_BITS_EXTRACTED]], [[MAGIC_WIDE]] +; CHECK-NEXT: ret i32 [[SIGNEXTENDED]] +; + %nbits_32bit = zext i8 %nbits to i32 + %low_bits_to_skip = sub i32 32, %nbits_32bit + %high_bits_extracted = lshr i32 %data, %low_bits_to_skip + %should_signext = icmp slt i32 %data, 0 + %nbits_16bit = zext i8 %nbits to i16 + %all_bits_except_low_nbits = shl i16 -1, %nbits_16bit + %magic = select i1 %should_signext, i16 %all_bits_except_low_nbits, i16 0 + %magic_wide = sext i16 %magic to i32 + + call void @use32(i32 %nbits_32bit) + call void @use32(i32 %low_bits_to_skip) + call void @use32(i32 %high_bits_extracted) + call void @use1(i1 %should_signext) + call void @use16(i16 %nbits_16bit) + call void @use16(i16 %all_bits_except_low_nbits) + call void @use16(i16 %magic) + call void @use32(i32 %magic_wide) + + %signextended = add i32 %high_bits_extracted, %magic_wide + ret i32 %signextended +} + +define i32 @t13_sub_zext_of_magic(i32 %data, i8 %nbits) { +; CHECK-LABEL: @t13_sub_zext_of_magic( +; CHECK-NEXT: [[NBITS_32BIT:%.*]] = zext i8 [[NBITS:%.*]] to i32 +; CHECK-NEXT: [[LOW_BITS_TO_SKIP:%.*]] = sub nsw i32 32, [[NBITS_32BIT]] +; CHECK-NEXT: [[HIGH_BITS_EXTRACTED:%.*]] = lshr i32 [[DATA:%.*]], [[LOW_BITS_TO_SKIP]] +; CHECK-NEXT: [[SHOULD_SIGNEXT:%.*]] = icmp slt i32 [[DATA]], 0 +; CHECK-NEXT: [[NBITS_16BIT:%.*]] = zext i8 [[NBITS]] to i16 +; CHECK-NEXT: [[ALL_BITS_EXCEPT_LOW_NBITS:%.*]] = shl i16 1, [[NBITS_16BIT]] +; CHECK-NEXT: [[MAGIC:%.*]] = select i1 [[SHOULD_SIGNEXT]], i16 [[ALL_BITS_EXCEPT_LOW_NBITS]], i16 0 +; CHECK-NEXT: [[MAGIC_WIDE:%.*]] = zext i16 [[MAGIC]] to i32 +; CHECK-NEXT: call void @use32(i32 [[NBITS_32BIT]]) +; CHECK-NEXT: call void @use32(i32 [[LOW_BITS_TO_SKIP]]) +; CHECK-NEXT: call void @use32(i32 [[HIGH_BITS_EXTRACTED]]) +; CHECK-NEXT: call void @use1(i1 [[SHOULD_SIGNEXT]]) +; CHECK-NEXT: call void @use16(i16 [[NBITS_16BIT]]) +; CHECK-NEXT: call void @use16(i16 [[ALL_BITS_EXCEPT_LOW_NBITS]]) +; CHECK-NEXT: call void @use16(i16 [[MAGIC]]) +; CHECK-NEXT: call void @use32(i32 [[MAGIC_WIDE]]) +; CHECK-NEXT: [[SIGNEXTENDED:%.*]] = sub i32 [[HIGH_BITS_EXTRACTED]], [[MAGIC_WIDE]] +; CHECK-NEXT: ret i32 [[SIGNEXTENDED]] +; + %nbits_32bit = zext i8 %nbits to i32 + %low_bits_to_skip = sub i32 32, %nbits_32bit + %high_bits_extracted = lshr i32 %data, %low_bits_to_skip + %should_signext = icmp slt i32 %data, 0 + %nbits_16bit = zext i8 %nbits to i16 + %all_bits_except_low_nbits = shl i16 1, %nbits_16bit + %magic = select i1 %should_signext, i16 %all_bits_except_low_nbits, i16 0 + %magic_wide = zext i16 %magic to i32 + + call void @use32(i32 %nbits_32bit) + call void @use32(i32 %low_bits_to_skip) + call void @use32(i32 %high_bits_extracted) + call void @use1(i1 %should_signext) + call void @use16(i16 %nbits_16bit) + call void @use16(i16 %all_bits_except_low_nbits) + call void @use16(i16 %magic) + call void @use32(i32 %magic_wide) + + %signextended = sub i32 %high_bits_extracted, %magic_wide + ret i32 %signextended +} + +define i32 @t14_add_sext_of_shl(i32 %data, i8 %nbits) { +; CHECK-LABEL: @t14_add_sext_of_shl( +; CHECK-NEXT: [[NBITS_32BIT:%.*]] = zext i8 [[NBITS:%.*]] to i32 +; CHECK-NEXT: [[LOW_BITS_TO_SKIP:%.*]] = sub nsw i32 32, [[NBITS_32BIT]] +; CHECK-NEXT: [[HIGH_BITS_EXTRACTED:%.*]] = lshr i32 [[DATA:%.*]], [[LOW_BITS_TO_SKIP]] +; CHECK-NEXT: [[SHOULD_SIGNEXT:%.*]] = icmp slt i32 [[DATA]], 0 +; CHECK-NEXT: [[NBITS_16BIT:%.*]] = zext i8 [[NBITS]] to i16 +; CHECK-NEXT: [[ALL_BITS_EXCEPT_LOW_NBITS:%.*]] = shl i16 -1, [[NBITS_16BIT]] +; CHECK-NEXT: [[ALL_BITS_EXCEPT_LOW_NBITS_WIDE:%.*]] = sext i16 [[ALL_BITS_EXCEPT_LOW_NBITS]] to i32 +; CHECK-NEXT: [[MAGIC:%.*]] = select i1 [[SHOULD_SIGNEXT]], i32 [[ALL_BITS_EXCEPT_LOW_NBITS_WIDE]], i32 0 +; CHECK-NEXT: call void @use32(i32 [[NBITS_32BIT]]) +; CHECK-NEXT: call void @use32(i32 [[LOW_BITS_TO_SKIP]]) +; CHECK-NEXT: call void @use32(i32 [[HIGH_BITS_EXTRACTED]]) +; CHECK-NEXT: call void @use1(i1 [[SHOULD_SIGNEXT]]) +; CHECK-NEXT: call void @use16(i16 [[NBITS_16BIT]]) +; CHECK-NEXT: call void @use16(i16 [[ALL_BITS_EXCEPT_LOW_NBITS]]) +; CHECK-NEXT: call void @use32(i32 [[ALL_BITS_EXCEPT_LOW_NBITS_WIDE]]) +; CHECK-NEXT: call void @use32(i32 [[MAGIC]]) +; CHECK-NEXT: [[SIGNEXTENDED:%.*]] = add i32 [[HIGH_BITS_EXTRACTED]], [[MAGIC]] +; CHECK-NEXT: ret i32 [[SIGNEXTENDED]] +; + %nbits_32bit = zext i8 %nbits to i32 + %low_bits_to_skip = sub i32 32, %nbits_32bit + %high_bits_extracted = lshr i32 %data, %low_bits_to_skip + %should_signext = icmp slt i32 %data, 0 + %nbits_16bit = zext i8 %nbits to i16 + %all_bits_except_low_nbits = shl i16 -1, %nbits_16bit + %all_bits_except_low_nbits_wide = sext i16 %all_bits_except_low_nbits to i32 + %magic = select i1 %should_signext, i32 %all_bits_except_low_nbits_wide, i32 0 + + call void @use32(i32 %nbits_32bit) + call void @use32(i32 %low_bits_to_skip) + call void @use32(i32 %high_bits_extracted) + call void @use1(i1 %should_signext) + call void @use16(i16 %nbits_16bit) + call void @use16(i16 %all_bits_except_low_nbits) + call void @use32(i32 %all_bits_except_low_nbits_wide) + call void @use32(i32 %magic) + + %signextended = add i32 %high_bits_extracted, %magic + ret i32 %signextended +} + +define i32 @t15_sub_zext_of_shl(i32 %data, i8 %nbits) { +; CHECK-LABEL: @t15_sub_zext_of_shl( +; CHECK-NEXT: [[NBITS_32BIT:%.*]] = zext i8 [[NBITS:%.*]] to i32 +; CHECK-NEXT: [[LOW_BITS_TO_SKIP:%.*]] = sub nsw i32 32, [[NBITS_32BIT]] +; CHECK-NEXT: [[HIGH_BITS_EXTRACTED:%.*]] = lshr i32 [[DATA:%.*]], [[LOW_BITS_TO_SKIP]] +; CHECK-NEXT: [[SHOULD_SIGNEXT:%.*]] = icmp slt i32 [[DATA]], 0 +; CHECK-NEXT: [[NBITS_16BIT:%.*]] = zext i8 [[NBITS]] to i16 +; CHECK-NEXT: [[ALL_BITS_EXCEPT_LOW_NBITS:%.*]] = shl i16 1, [[NBITS_16BIT]] +; CHECK-NEXT: [[ALL_BITS_EXCEPT_LOW_NBITS_WIDE:%.*]] = zext i16 [[ALL_BITS_EXCEPT_LOW_NBITS]] to i32 +; CHECK-NEXT: [[MAGIC:%.*]] = select i1 [[SHOULD_SIGNEXT]], i32 [[ALL_BITS_EXCEPT_LOW_NBITS_WIDE]], i32 0 +; CHECK-NEXT: call void @use32(i32 [[NBITS_32BIT]]) +; CHECK-NEXT: call void @use32(i32 [[LOW_BITS_TO_SKIP]]) +; CHECK-NEXT: call void @use32(i32 [[HIGH_BITS_EXTRACTED]]) +; CHECK-NEXT: call void @use1(i1 [[SHOULD_SIGNEXT]]) +; CHECK-NEXT: call void @use16(i16 [[NBITS_16BIT]]) +; CHECK-NEXT: call void @use16(i16 [[ALL_BITS_EXCEPT_LOW_NBITS]]) +; CHECK-NEXT: call void @use32(i32 [[ALL_BITS_EXCEPT_LOW_NBITS_WIDE]]) +; CHECK-NEXT: call void @use32(i32 [[MAGIC]]) +; CHECK-NEXT: [[SIGNEXTENDED:%.*]] = sub i32 [[HIGH_BITS_EXTRACTED]], [[MAGIC]] +; CHECK-NEXT: ret i32 [[SIGNEXTENDED]] +; + %nbits_32bit = zext i8 %nbits to i32 + %low_bits_to_skip = sub i32 32, %nbits_32bit + %high_bits_extracted = lshr i32 %data, %low_bits_to_skip + %should_signext = icmp slt i32 %data, 0 + %nbits_16bit = zext i8 %nbits to i16 + %all_bits_except_low_nbits = shl i16 1, %nbits_16bit + %all_bits_except_low_nbits_wide = zext i16 %all_bits_except_low_nbits to i32 + %magic = select i1 %should_signext, i32 %all_bits_except_low_nbits_wide, i32 0 + + call void @use32(i32 %nbits_32bit) + call void @use32(i32 %low_bits_to_skip) + call void @use32(i32 %high_bits_extracted) + call void @use1(i1 %should_signext) + call void @use16(i16 %nbits_16bit) + call void @use16(i16 %all_bits_except_low_nbits) + call void @use32(i32 %all_bits_except_low_nbits_wide) + call void @use32(i32 %magic) + + %signextended = sub i32 %high_bits_extracted, %magic + ret i32 %signextended +} + +; Negative tests. + +define i32 @n16(i32 %data, i32 %nbits) { +; CHECK-LABEL: @n16( +; CHECK-NEXT: [[LOW_BITS_TO_SKIP:%.*]] = sub i32 31, [[NBITS:%.*]] +; CHECK-NEXT: [[HIGH_BITS_EXTRACTED:%.*]] = lshr i32 [[DATA:%.*]], [[LOW_BITS_TO_SKIP]] +; CHECK-NEXT: [[SHOULD_SIGNEXT:%.*]] = icmp slt i32 [[DATA]], 0 +; CHECK-NEXT: [[ALL_BITS_EXCEPT_LOW_NBITS:%.*]] = shl i32 -1, [[NBITS]] +; CHECK-NEXT: [[MAGIC:%.*]] = select i1 [[SHOULD_SIGNEXT]], i32 [[ALL_BITS_EXCEPT_LOW_NBITS]], i32 0 +; CHECK-NEXT: call void @use32(i32 [[LOW_BITS_TO_SKIP]]) +; CHECK-NEXT: call void @use32(i32 [[HIGH_BITS_EXTRACTED]]) +; CHECK-NEXT: call void @use1(i1 [[SHOULD_SIGNEXT]]) +; CHECK-NEXT: call void @use32(i32 [[ALL_BITS_EXCEPT_LOW_NBITS]]) +; CHECK-NEXT: call void @use32(i32 [[MAGIC]]) +; CHECK-NEXT: [[SIGNEXTENDED:%.*]] = add i32 [[HIGH_BITS_EXTRACTED]], [[MAGIC]] +; CHECK-NEXT: ret i32 [[SIGNEXTENDED]] +; + %low_bits_to_skip = sub i32 31, %nbits ; not 32 + %high_bits_extracted = lshr i32 %data, %low_bits_to_skip + %should_signext = icmp slt i32 %data, 0 + %all_bits_except_low_nbits = shl i32 -1, %nbits + %magic = select i1 %should_signext, i32 %all_bits_except_low_nbits, i32 0 + + call void @use32(i32 %low_bits_to_skip) + call void @use32(i32 %high_bits_extracted) + call void @use1(i1 %should_signext) + call void @use32(i32 %all_bits_except_low_nbits) + call void @use32(i32 %magic) + + %signextended = add i32 %high_bits_extracted, %magic + ret i32 %signextended +} + +define i32 @n17_add(i32 %data, i32 %nbits) { +; CHECK-LABEL: @n17_add( +; CHECK-NEXT: [[LOW_BITS_TO_SKIP:%.*]] = sub i32 32, [[NBITS:%.*]] +; CHECK-NEXT: [[HIGH_BITS_EXTRACTED:%.*]] = lshr i32 [[DATA:%.*]], [[LOW_BITS_TO_SKIP]] +; CHECK-NEXT: [[SHOULD_SIGNEXT:%.*]] = icmp slt i32 [[DATA]], 0 +; CHECK-NEXT: [[ALL_BITS_EXCEPT_LOW_NBITS:%.*]] = shl i32 1, [[NBITS]] +; CHECK-NEXT: [[MAGIC:%.*]] = select i1 [[SHOULD_SIGNEXT]], i32 [[ALL_BITS_EXCEPT_LOW_NBITS]], i32 0 +; CHECK-NEXT: call void @use32(i32 [[LOW_BITS_TO_SKIP]]) +; CHECK-NEXT: call void @use32(i32 [[HIGH_BITS_EXTRACTED]]) +; CHECK-NEXT: call void @use1(i1 [[SHOULD_SIGNEXT]]) +; CHECK-NEXT: call void @use32(i32 [[ALL_BITS_EXCEPT_LOW_NBITS]]) +; CHECK-NEXT: call void @use32(i32 [[MAGIC]]) +; CHECK-NEXT: [[SIGNEXTENDED:%.*]] = add i32 [[HIGH_BITS_EXTRACTED]], [[MAGIC]] +; CHECK-NEXT: ret i32 [[SIGNEXTENDED]] +; + %low_bits_to_skip = sub i32 32, %nbits + %high_bits_extracted = lshr i32 %data, %low_bits_to_skip + %should_signext = icmp slt i32 %data, 0 + %all_bits_except_low_nbits = shl i32 1, %nbits ; not -1 + %magic = select i1 %should_signext, i32 %all_bits_except_low_nbits, i32 0 + + call void @use32(i32 %low_bits_to_skip) + call void @use32(i32 %high_bits_extracted) + call void @use1(i1 %should_signext) + call void @use32(i32 %all_bits_except_low_nbits) + call void @use32(i32 %magic) + + %signextended = add i32 %high_bits_extracted, %magic + ret i32 %signextended +} + +define i32 @n18(i32 %data, i32 %nbits) { +; CHECK-LABEL: @n18( +; CHECK-NEXT: [[LOW_BITS_TO_SKIP:%.*]] = sub i32 32, [[NBITS:%.*]] +; CHECK-NEXT: [[HIGH_BITS_EXTRACTED:%.*]] = lshr i32 [[DATA:%.*]], [[LOW_BITS_TO_SKIP]] +; CHECK-NEXT: [[SHOULD_SIGNEXT:%.*]] = icmp slt i32 [[DATA]], 0 +; CHECK-NEXT: [[ALL_BITS_EXCEPT_LOW_NBITS:%.*]] = shl i32 -1, [[NBITS]] +; CHECK-NEXT: [[MAGIC:%.*]] = select i1 [[SHOULD_SIGNEXT]], i32 0, i32 [[ALL_BITS_EXCEPT_LOW_NBITS]] +; CHECK-NEXT: call void @use32(i32 [[LOW_BITS_TO_SKIP]]) +; CHECK-NEXT: call void @use32(i32 [[HIGH_BITS_EXTRACTED]]) +; CHECK-NEXT: call void @use1(i1 [[SHOULD_SIGNEXT]]) +; CHECK-NEXT: call void @use32(i32 [[ALL_BITS_EXCEPT_LOW_NBITS]]) +; CHECK-NEXT: call void @use32(i32 [[MAGIC]]) +; CHECK-NEXT: [[SIGNEXTENDED:%.*]] = add i32 [[HIGH_BITS_EXTRACTED]], [[MAGIC]] +; CHECK-NEXT: ret i32 [[SIGNEXTENDED]] +; + %low_bits_to_skip = sub i32 32, %nbits + %high_bits_extracted = lshr i32 %data, %low_bits_to_skip + %should_signext = icmp slt i32 %data, 0 + %all_bits_except_low_nbits = shl i32 -1, %nbits + %magic = select i1 %should_signext, i32 0, i32 %all_bits_except_low_nbits ; wrong order + + call void @use32(i32 %low_bits_to_skip) + call void @use32(i32 %high_bits_extracted) + call void @use1(i1 %should_signext) + call void @use32(i32 %all_bits_except_low_nbits) + call void @use32(i32 %magic) + + %signextended = add i32 %high_bits_extracted, %magic + ret i32 %signextended +} + +define i32 @n19(i32 %data1, i32 %data2, i32 %nbits) { +; CHECK-LABEL: @n19( +; CHECK-NEXT: [[LOW_BITS_TO_SKIP:%.*]] = sub i32 32, [[NBITS:%.*]] +; CHECK-NEXT: [[HIGH_BITS_EXTRACTED:%.*]] = lshr i32 [[DATA1:%.*]], [[LOW_BITS_TO_SKIP]] +; CHECK-NEXT: [[SHOULD_SIGNEXT:%.*]] = icmp slt i32 [[DATA2:%.*]], 0 +; CHECK-NEXT: [[ALL_BITS_EXCEPT_LOW_NBITS:%.*]] = shl i32 -1, [[NBITS]] +; CHECK-NEXT: [[MAGIC:%.*]] = select i1 [[SHOULD_SIGNEXT]], i32 [[ALL_BITS_EXCEPT_LOW_NBITS]], i32 0 +; CHECK-NEXT: call void @use32(i32 [[LOW_BITS_TO_SKIP]]) +; CHECK-NEXT: call void @use32(i32 [[HIGH_BITS_EXTRACTED]]) +; CHECK-NEXT: call void @use1(i1 [[SHOULD_SIGNEXT]]) +; CHECK-NEXT: call void @use32(i32 [[ALL_BITS_EXCEPT_LOW_NBITS]]) +; CHECK-NEXT: call void @use32(i32 [[MAGIC]]) +; CHECK-NEXT: [[SIGNEXTENDED:%.*]] = add i32 [[HIGH_BITS_EXTRACTED]], [[MAGIC]] +; CHECK-NEXT: ret i32 [[SIGNEXTENDED]] +; + %low_bits_to_skip = sub i32 32, %nbits + %high_bits_extracted = lshr i32 %data1, %low_bits_to_skip ; not %data2 + %should_signext = icmp slt i32 %data2, 0 ; not %data1 + %all_bits_except_low_nbits = shl i32 -1, %nbits + %magic = select i1 %should_signext, i32 %all_bits_except_low_nbits, i32 0 + + call void @use32(i32 %low_bits_to_skip) + call void @use32(i32 %high_bits_extracted) + call void @use1(i1 %should_signext) + call void @use32(i32 %all_bits_except_low_nbits) + call void @use32(i32 %magic) + + %signextended = add i32 %high_bits_extracted, %magic + ret i32 %signextended +} + +define i32 @n20(i32 %data, i32 %nbits1, i32 %nbits2) { +; CHECK-LABEL: @n20( +; CHECK-NEXT: [[LOW_BITS_TO_SKIP:%.*]] = sub i32 32, [[NBITS1:%.*]] +; CHECK-NEXT: [[HIGH_BITS_EXTRACTED:%.*]] = lshr i32 [[DATA:%.*]], [[LOW_BITS_TO_SKIP]] +; CHECK-NEXT: [[SHOULD_SIGNEXT:%.*]] = icmp slt i32 [[DATA]], 0 +; CHECK-NEXT: [[ALL_BITS_EXCEPT_LOW_NBITS:%.*]] = shl i32 -1, [[NBITS2:%.*]] +; CHECK-NEXT: [[MAGIC:%.*]] = select i1 [[SHOULD_SIGNEXT]], i32 [[ALL_BITS_EXCEPT_LOW_NBITS]], i32 0 +; CHECK-NEXT: call void @use32(i32 [[LOW_BITS_TO_SKIP]]) +; CHECK-NEXT: call void @use32(i32 [[HIGH_BITS_EXTRACTED]]) +; CHECK-NEXT: call void @use1(i1 [[SHOULD_SIGNEXT]]) +; CHECK-NEXT: call void @use32(i32 [[ALL_BITS_EXCEPT_LOW_NBITS]]) +; CHECK-NEXT: call void @use32(i32 [[MAGIC]]) +; CHECK-NEXT: [[SIGNEXTENDED:%.*]] = add i32 [[HIGH_BITS_EXTRACTED]], [[MAGIC]] +; CHECK-NEXT: ret i32 [[SIGNEXTENDED]] +; + %low_bits_to_skip = sub i32 32, %nbits1 ; not %nbits2 + %high_bits_extracted = lshr i32 %data, %low_bits_to_skip + %should_signext = icmp slt i32 %data, 0 + %all_bits_except_low_nbits = shl i32 -1, %nbits2 ; not %nbits1 + %magic = select i1 %should_signext, i32 %all_bits_except_low_nbits, i32 0 + + call void @use32(i32 %low_bits_to_skip) + call void @use32(i32 %high_bits_extracted) + call void @use1(i1 %should_signext) + call void @use32(i32 %all_bits_except_low_nbits) + call void @use32(i32 %magic) + + %signextended = add i32 %high_bits_extracted, %magic + ret i32 %signextended +} + +define i32 @n21(i32 %data, i32 %nbits) { +; CHECK-LABEL: @n21( +; CHECK-NEXT: [[LOW_BITS_TO_SKIP:%.*]] = sub i32 32, [[NBITS:%.*]] +; CHECK-NEXT: [[HIGH_BITS_EXTRACTED:%.*]] = lshr i32 [[DATA:%.*]], [[LOW_BITS_TO_SKIP]] +; CHECK-NEXT: [[SHOULD_SIGNEXT:%.*]] = icmp sgt i32 [[DATA]], 0 +; CHECK-NEXT: [[ALL_BITS_EXCEPT_LOW_NBITS:%.*]] = shl i32 -1, [[NBITS]] +; CHECK-NEXT: [[MAGIC:%.*]] = select i1 [[SHOULD_SIGNEXT]], i32 [[ALL_BITS_EXCEPT_LOW_NBITS]], i32 0 +; CHECK-NEXT: call void @use32(i32 [[LOW_BITS_TO_SKIP]]) +; CHECK-NEXT: call void @use32(i32 [[HIGH_BITS_EXTRACTED]]) +; CHECK-NEXT: call void @use1(i1 [[SHOULD_SIGNEXT]]) +; CHECK-NEXT: call void @use32(i32 [[ALL_BITS_EXCEPT_LOW_NBITS]]) +; CHECK-NEXT: call void @use32(i32 [[MAGIC]]) +; CHECK-NEXT: [[SIGNEXTENDED:%.*]] = add i32 [[HIGH_BITS_EXTRACTED]], [[MAGIC]] +; CHECK-NEXT: ret i32 [[SIGNEXTENDED]] +; + %low_bits_to_skip = sub i32 32, %nbits + %high_bits_extracted = lshr i32 %data, %low_bits_to_skip + %should_signext = icmp sgt i32 %data, 0 ; this isn't a sign bit test + %all_bits_except_low_nbits = shl i32 -1, %nbits + %magic = select i1 %should_signext, i32 %all_bits_except_low_nbits, i32 0 + + call void @use32(i32 %low_bits_to_skip) + call void @use32(i32 %high_bits_extracted) + call void @use1(i1 %should_signext) + call void @use32(i32 %all_bits_except_low_nbits) + call void @use32(i32 %magic) + + %signextended = add i32 %high_bits_extracted, %magic + ret i32 %signextended +} + +define i32 @n22(i64 %data, i32 %nbits) { +; CHECK-LABEL: @n22( +; CHECK-NEXT: [[LOW_BITS_TO_SKIP:%.*]] = sub i32 63, [[NBITS:%.*]] +; CHECK-NEXT: [[LOW_BITS_TO_SKIP_WIDE:%.*]] = zext i32 [[LOW_BITS_TO_SKIP]] to i64 +; CHECK-NEXT: [[HIGH_BITS_EXTRACTED_WIDE:%.*]] = lshr i64 [[DATA:%.*]], [[LOW_BITS_TO_SKIP_WIDE]] +; CHECK-NEXT: [[HIGH_BITS_EXTRACTED:%.*]] = trunc i64 [[HIGH_BITS_EXTRACTED_WIDE]] to i32 +; CHECK-NEXT: [[SHOULD_SIGNEXT:%.*]] = icmp slt i64 [[DATA]], 0 +; CHECK-NEXT: [[ALL_BITS_EXCEPT_LOW_NBITS:%.*]] = shl i32 -1, [[NBITS]] +; CHECK-NEXT: [[MAGIC:%.*]] = select i1 [[SHOULD_SIGNEXT]], i32 [[ALL_BITS_EXCEPT_LOW_NBITS]], i32 0 +; CHECK-NEXT: call void @use32(i32 [[LOW_BITS_TO_SKIP]]) +; CHECK-NEXT: call void @use64(i64 [[LOW_BITS_TO_SKIP_WIDE]]) +; CHECK-NEXT: call void @use64(i64 [[HIGH_BITS_EXTRACTED_WIDE]]) +; CHECK-NEXT: call void @use32(i32 [[HIGH_BITS_EXTRACTED]]) +; CHECK-NEXT: call void @use1(i1 [[SHOULD_SIGNEXT]]) +; CHECK-NEXT: call void @use32(i32 [[ALL_BITS_EXCEPT_LOW_NBITS]]) +; CHECK-NEXT: call void @use32(i32 [[MAGIC]]) +; CHECK-NEXT: [[SIGNEXTENDED:%.*]] = add i32 [[MAGIC]], [[HIGH_BITS_EXTRACTED]] +; CHECK-NEXT: ret i32 [[SIGNEXTENDED]] +; + %low_bits_to_skip = sub i32 63, %nbits ; not 64 + %low_bits_to_skip_wide = zext i32 %low_bits_to_skip to i64 + %high_bits_extracted_wide = lshr i64 %data, %low_bits_to_skip_wide + %high_bits_extracted = trunc i64 %high_bits_extracted_wide to i32 + %should_signext = icmp slt i64 %data, 0 + %all_bits_except_low_nbits = shl i32 -1, %nbits + %magic = select i1 %should_signext, i32 %all_bits_except_low_nbits, i32 0 + + call void @use32(i32 %low_bits_to_skip) + call void @use64(i64 %low_bits_to_skip_wide) + call void @use64(i64 %high_bits_extracted_wide) + call void @use32(i32 %high_bits_extracted) + call void @use1(i1 %should_signext) + call void @use32(i32 %all_bits_except_low_nbits) + call void @use32(i32 %magic) + + %signextended = add i32 %magic, %high_bits_extracted + ret i32 %signextended +} + +define i32 @n23(i32 %data, i32 %nbits) { +; CHECK-LABEL: @n23( +; CHECK-NEXT: [[LOW_BITS_TO_SKIP:%.*]] = sub i32 32, [[NBITS:%.*]] +; CHECK-NEXT: [[HIGH_BITS_EXTRACTED:%.*]] = ashr i32 [[DATA:%.*]], [[LOW_BITS_TO_SKIP]] +; CHECK-NEXT: [[SHOULD_SIGNEXT:%.*]] = icmp slt i32 [[DATA]], 0 +; CHECK-NEXT: [[ALL_BITS_EXCEPT_LOW_NBITS:%.*]] = shl i32 -1, [[NBITS]] +; CHECK-NEXT: [[MAGIC:%.*]] = select i1 [[SHOULD_SIGNEXT]], i32 [[ALL_BITS_EXCEPT_LOW_NBITS]], i32 0 +; CHECK-NEXT: call void @use32(i32 [[LOW_BITS_TO_SKIP]]) +; CHECK-NEXT: call void @use32(i32 [[HIGH_BITS_EXTRACTED]]) +; CHECK-NEXT: call void @use1(i1 [[SHOULD_SIGNEXT]]) +; CHECK-NEXT: call void @use32(i32 [[ALL_BITS_EXCEPT_LOW_NBITS]]) +; CHECK-NEXT: call void @use32(i32 [[MAGIC]]) +; CHECK-NEXT: [[SIGNEXTENDED:%.*]] = add i32 [[HIGH_BITS_EXTRACTED]], [[MAGIC]] +; CHECK-NEXT: ret i32 [[SIGNEXTENDED]] +; + %low_bits_to_skip = sub i32 32, %nbits + %high_bits_extracted = ashr i32 %data, %low_bits_to_skip ; not `lshr` + %should_signext = icmp slt i32 %data, 0 + %all_bits_except_low_nbits = shl i32 -1, %nbits + %magic = select i1 %should_signext, i32 %all_bits_except_low_nbits, i32 0 + + call void @use32(i32 %low_bits_to_skip) + call void @use32(i32 %high_bits_extracted) + call void @use1(i1 %should_signext) + call void @use32(i32 %all_bits_except_low_nbits) + call void @use32(i32 %magic) + + %signextended = add i32 %high_bits_extracted, %magic + ret i32 %signextended +} + +define i32 @n24(i32 %data, i32 %nbits) { +; CHECK-LABEL: @n24( +; CHECK-NEXT: [[LOW_BITS_TO_SKIP:%.*]] = sub i32 32, [[NBITS:%.*]] +; CHECK-NEXT: [[HIGH_BITS_EXTRACTED:%.*]] = lshr i32 [[DATA:%.*]], [[LOW_BITS_TO_SKIP]] +; CHECK-NEXT: [[SHOULD_SIGNEXT:%.*]] = icmp slt i32 [[DATA]], 0 +; CHECK-NEXT: [[HIGHER_BIT_AFTER_SIGNBIT:%.*]] = shl i32 1, [[NBITS]] +; CHECK-NEXT: [[MAGIC:%.*]] = select i1 [[SHOULD_SIGNEXT]], i32 [[HIGHER_BIT_AFTER_SIGNBIT]], i32 0 +; CHECK-NEXT: call void @use32(i32 [[LOW_BITS_TO_SKIP]]) +; CHECK-NEXT: call void @use32(i32 [[HIGH_BITS_EXTRACTED]]) +; CHECK-NEXT: call void @use1(i1 [[SHOULD_SIGNEXT]]) +; CHECK-NEXT: call void @use32(i32 [[HIGHER_BIT_AFTER_SIGNBIT]]) +; CHECK-NEXT: call void @use32(i32 [[MAGIC]]) +; CHECK-NEXT: [[SIGNEXTENDED:%.*]] = sub i32 [[MAGIC]], [[HIGH_BITS_EXTRACTED]] +; CHECK-NEXT: ret i32 [[SIGNEXTENDED]] +; + %low_bits_to_skip = sub i32 32, %nbits + %high_bits_extracted = lshr i32 %data, %low_bits_to_skip + %should_signext = icmp slt i32 %data, 0 + %higher_bit_after_signbit = shl i32 1, %nbits + %magic = select i1 %should_signext, i32 %higher_bit_after_signbit, i32 0 + + call void @use32(i32 %low_bits_to_skip) + call void @use32(i32 %high_bits_extracted) + call void @use1(i1 %should_signext) + call void @use32(i32 %higher_bit_after_signbit) + call void @use32(i32 %magic) + + %signextended = sub i32 %magic, %high_bits_extracted ; wrong order; `sub` is not commutative + ret i32 %signextended +} + +define i32 @n25_sub(i32 %data, i32 %nbits) { +; CHECK-LABEL: @n25_sub( +; CHECK-NEXT: [[LOW_BITS_TO_SKIP:%.*]] = sub i32 32, [[NBITS:%.*]] +; CHECK-NEXT: [[HIGH_BITS_EXTRACTED:%.*]] = lshr i32 [[DATA:%.*]], [[LOW_BITS_TO_SKIP]] +; CHECK-NEXT: [[SHOULD_SIGNEXT:%.*]] = icmp slt i32 [[DATA]], 0 +; CHECK-NEXT: [[HIGHER_BIT_AFTER_SIGNBIT:%.*]] = shl i32 -1, [[NBITS]] +; CHECK-NEXT: [[MAGIC:%.*]] = select i1 [[SHOULD_SIGNEXT]], i32 [[HIGHER_BIT_AFTER_SIGNBIT]], i32 0 +; CHECK-NEXT: call void @use32(i32 [[LOW_BITS_TO_SKIP]]) +; CHECK-NEXT: call void @use32(i32 [[HIGH_BITS_EXTRACTED]]) +; CHECK-NEXT: call void @use1(i1 [[SHOULD_SIGNEXT]]) +; CHECK-NEXT: call void @use32(i32 [[HIGHER_BIT_AFTER_SIGNBIT]]) +; CHECK-NEXT: call void @use32(i32 [[MAGIC]]) +; CHECK-NEXT: [[SIGNEXTENDED:%.*]] = sub i32 [[HIGH_BITS_EXTRACTED]], [[MAGIC]] +; CHECK-NEXT: ret i32 [[SIGNEXTENDED]] +; + %low_bits_to_skip = sub i32 32, %nbits + %high_bits_extracted = lshr i32 %data, %low_bits_to_skip + %should_signext = icmp slt i32 %data, 0 + %higher_bit_after_signbit = shl i32 -1, %nbits ; not 1 + %magic = select i1 %should_signext, i32 %higher_bit_after_signbit, i32 0 + + call void @use32(i32 %low_bits_to_skip) + call void @use32(i32 %high_bits_extracted) + call void @use1(i1 %should_signext) + call void @use32(i32 %higher_bit_after_signbit) + call void @use32(i32 %magic) + + %signextended = sub i32 %high_bits_extracted, %magic + ret i32 %signextended +} + +define i32 @n26(i32 %data, i32 %nbits) { +; CHECK-LABEL: @n26( +; CHECK-NEXT: [[LOW_BITS_TO_SKIP:%.*]] = sub i32 32, [[NBITS:%.*]] +; CHECK-NEXT: [[HIGH_BITS_EXTRACTED:%.*]] = lshr i32 [[DATA:%.*]], [[LOW_BITS_TO_SKIP]] +; CHECK-NEXT: [[SHOULD_SIGNEXT:%.*]] = icmp slt i32 [[DATA]], 0 +; CHECK-NEXT: [[ALL_BITS_EXCEPT_LOW_NBITS:%.*]] = shl i32 -1, [[NBITS]] +; CHECK-NEXT: [[MAGIC:%.*]] = select i1 [[SHOULD_SIGNEXT]], i32 [[ALL_BITS_EXCEPT_LOW_NBITS]], i32 -1 +; CHECK-NEXT: call void @use32(i32 [[LOW_BITS_TO_SKIP]]) +; CHECK-NEXT: call void @use32(i32 [[HIGH_BITS_EXTRACTED]]) +; CHECK-NEXT: call void @use1(i1 [[SHOULD_SIGNEXT]]) +; CHECK-NEXT: call void @use32(i32 [[ALL_BITS_EXCEPT_LOW_NBITS]]) +; CHECK-NEXT: call void @use32(i32 [[MAGIC]]) +; CHECK-NEXT: [[SIGNEXTENDED:%.*]] = add i32 [[HIGH_BITS_EXTRACTED]], [[MAGIC]] +; CHECK-NEXT: ret i32 [[SIGNEXTENDED]] +; + %low_bits_to_skip = sub i32 32, %nbits + %high_bits_extracted = lshr i32 %data, %low_bits_to_skip + %should_signext = icmp slt i32 %data, 0 + %all_bits_except_low_nbits = shl i32 -1, %nbits + %magic = select i1 %should_signext, i32 %all_bits_except_low_nbits, i32 -1 ; not 0 + + call void @use32(i32 %low_bits_to_skip) + call void @use32(i32 %high_bits_extracted) + call void @use1(i1 %should_signext) + call void @use32(i32 %all_bits_except_low_nbits) + call void @use32(i32 %magic) + + %signextended = add i32 %high_bits_extracted, %magic + ret i32 %signextended +} + +define i32 @n27_add_zext_of_magic(i32 %data, i8 %nbits) { +; CHECK-LABEL: @n27_add_zext_of_magic( +; CHECK-NEXT: [[NBITS_32BIT:%.*]] = zext i8 [[NBITS:%.*]] to i32 +; CHECK-NEXT: [[LOW_BITS_TO_SKIP:%.*]] = sub nsw i32 32, [[NBITS_32BIT]] +; CHECK-NEXT: [[HIGH_BITS_EXTRACTED:%.*]] = lshr i32 [[DATA:%.*]], [[LOW_BITS_TO_SKIP]] +; CHECK-NEXT: [[SHOULD_SIGNEXT:%.*]] = icmp slt i32 [[DATA]], 0 +; CHECK-NEXT: [[NBITS_16BIT:%.*]] = zext i8 [[NBITS]] to i16 +; CHECK-NEXT: [[ALL_BITS_EXCEPT_LOW_NBITS:%.*]] = shl i16 -1, [[NBITS_16BIT]] +; CHECK-NEXT: [[MAGIC:%.*]] = select i1 [[SHOULD_SIGNEXT]], i16 [[ALL_BITS_EXCEPT_LOW_NBITS]], i16 0 +; CHECK-NEXT: [[MAGIC_WIDE:%.*]] = zext i16 [[MAGIC]] to i32 +; CHECK-NEXT: call void @use32(i32 [[NBITS_32BIT]]) +; CHECK-NEXT: call void @use32(i32 [[LOW_BITS_TO_SKIP]]) +; CHECK-NEXT: call void @use32(i32 [[HIGH_BITS_EXTRACTED]]) +; CHECK-NEXT: call void @use1(i1 [[SHOULD_SIGNEXT]]) +; CHECK-NEXT: call void @use16(i16 [[NBITS_16BIT]]) +; CHECK-NEXT: call void @use16(i16 [[ALL_BITS_EXCEPT_LOW_NBITS]]) +; CHECK-NEXT: call void @use16(i16 [[MAGIC]]) +; CHECK-NEXT: call void @use32(i32 [[MAGIC_WIDE]]) +; CHECK-NEXT: [[SIGNEXTENDED:%.*]] = add i32 [[HIGH_BITS_EXTRACTED]], [[MAGIC_WIDE]] +; CHECK-NEXT: ret i32 [[SIGNEXTENDED]] +; + %nbits_32bit = zext i8 %nbits to i32 + %low_bits_to_skip = sub i32 32, %nbits_32bit + %high_bits_extracted = lshr i32 %data, %low_bits_to_skip + %should_signext = icmp slt i32 %data, 0 + %nbits_16bit = zext i8 %nbits to i16 + %all_bits_except_low_nbits = shl i16 -1, %nbits_16bit + %magic = select i1 %should_signext, i16 %all_bits_except_low_nbits, i16 0 + %magic_wide = zext i16 %magic to i32 ; not sext + + call void @use32(i32 %nbits_32bit) + call void @use32(i32 %low_bits_to_skip) + call void @use32(i32 %high_bits_extracted) + call void @use1(i1 %should_signext) + call void @use16(i16 %nbits_16bit) + call void @use16(i16 %all_bits_except_low_nbits) + call void @use16(i16 %magic) + call void @use32(i32 %magic_wide) + + %signextended = add i32 %high_bits_extracted, %magic_wide + ret i32 %signextended +} + +define i32 @n28_sub_sext_of_magic(i32 %data, i8 %nbits) { +; CHECK-LABEL: @n28_sub_sext_of_magic( +; CHECK-NEXT: [[NBITS_32BIT:%.*]] = zext i8 [[NBITS:%.*]] to i32 +; CHECK-NEXT: [[LOW_BITS_TO_SKIP:%.*]] = sub nsw i32 32, [[NBITS_32BIT]] +; CHECK-NEXT: [[HIGH_BITS_EXTRACTED:%.*]] = lshr i32 [[DATA:%.*]], [[LOW_BITS_TO_SKIP]] +; CHECK-NEXT: [[SHOULD_SIGNEXT:%.*]] = icmp slt i32 [[DATA]], 0 +; CHECK-NEXT: [[NBITS_16BIT:%.*]] = zext i8 [[NBITS]] to i16 +; CHECK-NEXT: [[ALL_BITS_EXCEPT_LOW_NBITS:%.*]] = shl i16 1, [[NBITS_16BIT]] +; CHECK-NEXT: [[MAGIC:%.*]] = select i1 [[SHOULD_SIGNEXT]], i16 [[ALL_BITS_EXCEPT_LOW_NBITS]], i16 0 +; CHECK-NEXT: [[MAGIC_WIDE:%.*]] = sext i16 [[MAGIC]] to i32 +; CHECK-NEXT: call void @use32(i32 [[NBITS_32BIT]]) +; CHECK-NEXT: call void @use32(i32 [[LOW_BITS_TO_SKIP]]) +; CHECK-NEXT: call void @use32(i32 [[HIGH_BITS_EXTRACTED]]) +; CHECK-NEXT: call void @use1(i1 [[SHOULD_SIGNEXT]]) +; CHECK-NEXT: call void @use16(i16 [[NBITS_16BIT]]) +; CHECK-NEXT: call void @use16(i16 [[ALL_BITS_EXCEPT_LOW_NBITS]]) +; CHECK-NEXT: call void @use16(i16 [[MAGIC]]) +; CHECK-NEXT: call void @use32(i32 [[MAGIC_WIDE]]) +; CHECK-NEXT: [[SIGNEXTENDED:%.*]] = sub i32 [[HIGH_BITS_EXTRACTED]], [[MAGIC_WIDE]] +; CHECK-NEXT: ret i32 [[SIGNEXTENDED]] +; + %nbits_32bit = zext i8 %nbits to i32 + %low_bits_to_skip = sub i32 32, %nbits_32bit + %high_bits_extracted = lshr i32 %data, %low_bits_to_skip + %should_signext = icmp slt i32 %data, 0 + %nbits_16bit = zext i8 %nbits to i16 + %all_bits_except_low_nbits = shl i16 1, %nbits_16bit + %magic = select i1 %should_signext, i16 %all_bits_except_low_nbits, i16 0 + %magic_wide = sext i16 %magic to i32 ; not zext + + call void @use32(i32 %nbits_32bit) + call void @use32(i32 %low_bits_to_skip) + call void @use32(i32 %high_bits_extracted) + call void @use1(i1 %should_signext) + call void @use16(i16 %nbits_16bit) + call void @use16(i16 %all_bits_except_low_nbits) + call void @use16(i16 %magic) + call void @use32(i32 %magic_wide) + + %signextended = sub i32 %high_bits_extracted, %magic_wide + ret i32 %signextended +} From 7cdeac43e57274fdac01f61bf2365a9efaffa5e8 Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Mon, 7 Oct 2019 20:53:27 +0000 Subject: [PATCH 164/254] [InstCombine] Fold conditional sign-extend of high-bit-extract into high-bit-extract-with-signext (PR42389) This can come up in Bit Stream abstractions. The pattern looks big/scary, but it can't be simplified any further. It only is so simple because a number of my preparatory folds had happened already (shift amount reassociation / shift amount reassociation in bit test, sign bit test detection). Highlights: * There are two main flavors: https://rise4fun.com/Alive/zWi The difference is add vs. sub, and left-shift of -1 vs. 1 * Since we only change the shift opcode, we can preserve the exact-ness: https://rise4fun.com/Alive/4u4 * There can be truncation after high-bit-extraction: https://rise4fun.com/Alive/slHc1 (the main pattern i'm after!) Which means that we need to ignore zext of shift amounts and of NBits. * The sign-extending magic can be extended itself (in add pattern via sext, in sub pattern via zext. not the other way around!) https://rise4fun.com/Alive/NhG (or those sext/zext can be sinked into `select`!) Which again means we should pay attention when matching NBits. * We can have both truncation of extraction and widening of magic: https://rise4fun.com/Alive/XTw In other words, i don't believe we need to have any checks on bitwidths of any of these constructs. This is worsened in general by the fact that we may have `sext` instead of `zext` for shift amounts, and we don't yet canonicalize to `zext`, although we should. I have not done anything about that here. Also, we really should have something to weed out `sub` like these, by folding them into `add` variant. https://bugs.llvm.org/show_bug.cgi?id=42389 llvm-svn: 373964 --- .../InstCombine/InstCombineAddSub.cpp | 110 ++++++++++++++++++ ...e-length-signext-after-high-bit-extract.ll | 38 +++--- 2 files changed, 129 insertions(+), 19 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 40cc188fbb60c..5d306cd8eea9d 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -1097,6 +1097,106 @@ static Instruction *foldToUnsignedSaturatedAdd(BinaryOperator &I) { return nullptr; } +static Instruction * +canonicalizeCondSignextOfHighBitExtractToSignextHighBitExtract( + BinaryOperator &I, InstCombiner::BuilderTy &Builder) { + assert((I.getOpcode() == Instruction::Add || + I.getOpcode() == Instruction::Sub) && + "Expecting add/sub instruction"); + + // We have a subtraction/addition between a (potentially truncated) *logical* + // right-shift of X and a "select". + Value *X, *Select; + Instruction *LowBitsToSkip, *Extract; + if (!match(&I, m_c_BinOp(m_TruncOrSelf(m_CombineAnd( + m_LShr(m_Value(X), m_Instruction(LowBitsToSkip)), + m_Instruction(Extract))), + m_Value(Select)))) + return nullptr; + + // `add` is commutative; but for `sub`, "select" *must* be on RHS. + if (I.getOpcode() == Instruction::Sub && I.getOperand(1) != Select) + return nullptr; + + Type *XTy = X->getType(); + bool HadTrunc = I.getType() != XTy; + + // If there was a truncation of extracted value, then we'll need to produce + // one extra instruction, so we need to ensure one instruction will go away. + if (HadTrunc && !match(&I, m_c_BinOp(m_OneUse(m_Value()), m_Value()))) + return nullptr; + + // Extraction should extract high NBits bits, with shift amount calculated as: + // low bits to skip = shift bitwidth - high bits to extract + // The shift amount itself may be extended, and we need to look past zero-ext + // when matching NBits, that will matter for matching later. + Constant *C; + Value *NBits; + if (!match( + LowBitsToSkip, + m_ZExtOrSelf(m_Sub(m_Constant(C), m_ZExtOrSelf(m_Value(NBits))))) || + !match(C, m_SpecificInt_ICMP(ICmpInst::Predicate::ICMP_EQ, + APInt(C->getType()->getScalarSizeInBits(), + X->getType()->getScalarSizeInBits())))) + return nullptr; + + // Sign-extending value can be sign-extended itself if we `add` it, + // or zero-extended if we `sub`tract it. + auto SkipExtInMagic = [&I](Value *&V) { + if (I.getOpcode() == Instruction::Add) + match(V, m_SExtOrSelf(m_Value(V))); + else + match(V, m_ZExtOrSelf(m_Value(V))); + }; + + // Now, finally validate the sign-extending magic. + // `select` itself may be appropriately extended, look past that. + SkipExtInMagic(Select); + + ICmpInst::Predicate Pred; + const APInt *Thr; + Value *SignExtendingValue, *Zero; + bool ShouldSignext; + // It must be a select between two values we will later estabilish to be a + // sign-extending value and a zero constant. The condition guarding the + // sign-extension must be based on a sign bit of the same X we had in `lshr`. + if (!match(Select, m_Select(m_ICmp(Pred, m_Specific(X), m_APInt(Thr)), + m_Value(SignExtendingValue), m_Value(Zero))) || + !isSignBitCheck(Pred, *Thr, ShouldSignext)) + return nullptr; + + // icmp-select pair is commutative. + if (!ShouldSignext) + std::swap(SignExtendingValue, Zero); + + // If we should not perform sign-extension then we must add/subtract zero. + if (!match(Zero, m_Zero())) + return nullptr; + // Otherwise, it should be some constant, left-shifted by the same NBits we + // had in `lshr`. Said left-shift can also be appropriately extended. + // Again, we must look past zero-ext when looking for NBits. + SkipExtInMagic(SignExtendingValue); + Constant *SignExtendingValueBaseConstant; + if (!match(SignExtendingValue, + m_Shl(m_Constant(SignExtendingValueBaseConstant), + m_ZExtOrSelf(m_Specific(NBits))))) + return nullptr; + // If we `add`, then the constant should be all-ones, else it should be one. + if (I.getOpcode() == Instruction::Add + ? !match(SignExtendingValueBaseConstant, m_AllOnes()) + : !match(SignExtendingValueBaseConstant, m_One())) + return nullptr; + + auto *NewAShr = BinaryOperator::CreateAShr(X, LowBitsToSkip, + Extract->getName() + ".sext"); + NewAShr->copyIRFlags(Extract); // Preserve `exact`-ness. + if (!HadTrunc) + return NewAShr; + + Builder.Insert(NewAShr); + return TruncInst::CreateTruncOrBitCast(NewAShr, I.getType()); +} + Instruction *InstCombiner::visitAdd(BinaryOperator &I) { if (Value *V = SimplifyAddInst(I.getOperand(0), I.getOperand(1), I.hasNoSignedWrap(), I.hasNoUnsignedWrap(), @@ -1302,6 +1402,11 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { if (Instruction *V = canonicalizeLowbitMask(I, Builder)) return V; + if (Instruction *V = + canonicalizeCondSignextOfHighBitExtractToSignextHighBitExtract( + I, Builder)) + return V; + if (Instruction *SatAdd = foldToUnsignedSaturatedAdd(I)) return SatAdd; @@ -1900,6 +2005,11 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { return SelectInst::Create(Cmp, Neg, A); } + if (Instruction *V = + canonicalizeCondSignextOfHighBitExtractToSignextHighBitExtract( + I, Builder)) + return V; + if (Instruction *Ext = narrowMathIfNoOverflow(I)) return Ext; diff --git a/llvm/test/Transforms/InstCombine/conditional-variable-length-signext-after-high-bit-extract.ll b/llvm/test/Transforms/InstCombine/conditional-variable-length-signext-after-high-bit-extract.ll index 70877dd526f4a..027755641e87e 100644 --- a/llvm/test/Transforms/InstCombine/conditional-variable-length-signext-after-high-bit-extract.ll +++ b/llvm/test/Transforms/InstCombine/conditional-variable-length-signext-after-high-bit-extract.ll @@ -26,7 +26,7 @@ define i32 @t0_notrunc_add(i32 %data, i32 %nbits) { ; CHECK-NEXT: call void @use1(i1 [[SHOULD_SIGNEXT]]) ; CHECK-NEXT: call void @use32(i32 [[ALL_BITS_EXCEPT_LOW_NBITS]]) ; CHECK-NEXT: call void @use32(i32 [[MAGIC]]) -; CHECK-NEXT: [[SIGNEXTENDED:%.*]] = add i32 [[HIGH_BITS_EXTRACTED]], [[MAGIC]] +; CHECK-NEXT: [[SIGNEXTENDED:%.*]] = ashr i32 [[DATA]], [[LOW_BITS_TO_SKIP]] ; CHECK-NEXT: ret i32 [[SIGNEXTENDED]] ; %low_bits_to_skip = sub i32 32, %nbits @@ -57,7 +57,7 @@ define i32 @t1_notrunc_sub(i32 %data, i32 %nbits) { ; CHECK-NEXT: call void @use1(i1 [[SHOULD_SIGNEXT]]) ; CHECK-NEXT: call void @use32(i32 [[HIGHER_BIT_AFTER_SIGNBIT]]) ; CHECK-NEXT: call void @use32(i32 [[MAGIC]]) -; CHECK-NEXT: [[SIGNEXTENDED:%.*]] = sub i32 [[HIGH_BITS_EXTRACTED]], [[MAGIC]] +; CHECK-NEXT: [[SIGNEXTENDED:%.*]] = ashr i32 [[DATA]], [[LOW_BITS_TO_SKIP]] ; CHECK-NEXT: ret i32 [[SIGNEXTENDED]] ; %low_bits_to_skip = sub i32 32, %nbits @@ -84,14 +84,14 @@ define i32 @t2_trunc_add(i64 %data, i32 %nbits) { ; CHECK-NEXT: [[HIGH_BITS_EXTRACTED:%.*]] = trunc i64 [[HIGH_BITS_EXTRACTED_WIDE]] to i32 ; CHECK-NEXT: [[SHOULD_SIGNEXT:%.*]] = icmp slt i64 [[DATA]], 0 ; CHECK-NEXT: [[ALL_BITS_EXCEPT_LOW_NBITS:%.*]] = shl i32 -1, [[NBITS]] -; CHECK-NEXT: [[MAGIC:%.*]] = select i1 [[SHOULD_SIGNEXT]], i32 [[ALL_BITS_EXCEPT_LOW_NBITS]], i32 0 ; CHECK-NEXT: call void @use32(i32 [[LOW_BITS_TO_SKIP]]) ; CHECK-NEXT: call void @use64(i64 [[LOW_BITS_TO_SKIP_WIDE]]) ; CHECK-NEXT: call void @use64(i64 [[HIGH_BITS_EXTRACTED_WIDE]]) ; CHECK-NEXT: call void @use32(i32 [[HIGH_BITS_EXTRACTED]]) ; CHECK-NEXT: call void @use1(i1 [[SHOULD_SIGNEXT]]) ; CHECK-NEXT: call void @use32(i32 [[ALL_BITS_EXCEPT_LOW_NBITS]]) -; CHECK-NEXT: [[SIGNEXTENDED:%.*]] = add i32 [[MAGIC]], [[HIGH_BITS_EXTRACTED]] +; CHECK-NEXT: [[TMP1:%.*]] = ashr i64 [[DATA]], [[LOW_BITS_TO_SKIP_WIDE]] +; CHECK-NEXT: [[SIGNEXTENDED:%.*]] = trunc i64 [[TMP1]] to i32 ; CHECK-NEXT: ret i32 [[SIGNEXTENDED]] ; %low_bits_to_skip = sub i32 64, %nbits @@ -121,14 +121,14 @@ define i32 @t3_trunc_sub(i64 %data, i32 %nbits) { ; CHECK-NEXT: [[HIGH_BITS_EXTRACTED:%.*]] = trunc i64 [[HIGH_BITS_EXTRACTED_WIDE]] to i32 ; CHECK-NEXT: [[SHOULD_SIGNEXT:%.*]] = icmp slt i64 [[DATA]], 0 ; CHECK-NEXT: [[HIGHER_BIT_AFTER_SIGNBIT:%.*]] = shl i32 1, [[NBITS]] -; CHECK-NEXT: [[MAGIC:%.*]] = select i1 [[SHOULD_SIGNEXT]], i32 [[HIGHER_BIT_AFTER_SIGNBIT]], i32 0 ; CHECK-NEXT: call void @use32(i32 [[LOW_BITS_TO_SKIP]]) ; CHECK-NEXT: call void @use64(i64 [[LOW_BITS_TO_SKIP_WIDE]]) ; CHECK-NEXT: call void @use64(i64 [[HIGH_BITS_EXTRACTED_WIDE]]) ; CHECK-NEXT: call void @use32(i32 [[HIGH_BITS_EXTRACTED]]) ; CHECK-NEXT: call void @use1(i1 [[SHOULD_SIGNEXT]]) ; CHECK-NEXT: call void @use32(i32 [[HIGHER_BIT_AFTER_SIGNBIT]]) -; CHECK-NEXT: [[SIGNEXTENDED:%.*]] = sub i32 [[HIGH_BITS_EXTRACTED]], [[MAGIC]] +; CHECK-NEXT: [[TMP1:%.*]] = ashr i64 [[DATA]], [[LOW_BITS_TO_SKIP_WIDE]] +; CHECK-NEXT: [[SIGNEXTENDED:%.*]] = trunc i64 [[TMP1]] to i32 ; CHECK-NEXT: ret i32 [[SIGNEXTENDED]] ; %low_bits_to_skip = sub i32 64, %nbits @@ -164,7 +164,7 @@ define i32 @t4_commutativity0(i32 %data, i32 %nbits) { ; CHECK-NEXT: call void @use1(i1 [[SHOULD_SIGNEXT]]) ; CHECK-NEXT: call void @use32(i32 [[ALL_BITS_EXCEPT_LOW_NBITS]]) ; CHECK-NEXT: call void @use32(i32 [[MAGIC]]) -; CHECK-NEXT: [[SIGNEXTENDED:%.*]] = add i32 [[HIGH_BITS_EXTRACTED]], [[MAGIC]] +; CHECK-NEXT: [[SIGNEXTENDED:%.*]] = ashr i32 [[DATA]], [[LOW_BITS_TO_SKIP]] ; CHECK-NEXT: ret i32 [[SIGNEXTENDED]] ; %low_bits_to_skip = sub i32 32, %nbits @@ -194,7 +194,7 @@ define i32 @t5_commutativity1(i32 %data, i32 %nbits) { ; CHECK-NEXT: call void @use1(i1 [[SHOULD_SIGNEXT]]) ; CHECK-NEXT: call void @use32(i32 [[ALL_BITS_EXCEPT_LOW_NBITS]]) ; CHECK-NEXT: call void @use32(i32 [[MAGIC]]) -; CHECK-NEXT: [[SIGNEXTENDED:%.*]] = add i32 [[HIGH_BITS_EXTRACTED]], [[MAGIC]] +; CHECK-NEXT: [[SIGNEXTENDED:%.*]] = ashr i32 [[DATA]], [[LOW_BITS_TO_SKIP]] ; CHECK-NEXT: ret i32 [[SIGNEXTENDED]] ; %low_bits_to_skip = sub i32 32, %nbits @@ -224,7 +224,7 @@ define i32 @t6_commutativity2(i32 %data, i32 %nbits) { ; CHECK-NEXT: call void @use1(i1 [[SHOULD_SIGNEXT]]) ; CHECK-NEXT: call void @use32(i32 [[ALL_BITS_EXCEPT_LOW_NBITS]]) ; CHECK-NEXT: call void @use32(i32 [[MAGIC]]) -; CHECK-NEXT: [[SIGNEXTENDED:%.*]] = add i32 [[MAGIC]], [[HIGH_BITS_EXTRACTED]] +; CHECK-NEXT: [[SIGNEXTENDED:%.*]] = ashr i32 [[DATA]], [[LOW_BITS_TO_SKIP]] ; CHECK-NEXT: ret i32 [[SIGNEXTENDED]] ; %low_bits_to_skip = sub i32 32, %nbits @@ -253,14 +253,14 @@ define i32 @t7_trunc_extrause0(i64 %data, i32 %nbits) { ; CHECK-NEXT: [[HIGH_BITS_EXTRACTED:%.*]] = trunc i64 [[HIGH_BITS_EXTRACTED_WIDE]] to i32 ; CHECK-NEXT: [[SHOULD_SIGNEXT:%.*]] = icmp slt i64 [[DATA]], 0 ; CHECK-NEXT: [[ALL_BITS_EXCEPT_LOW_NBITS:%.*]] = shl i32 -1, [[NBITS]] -; CHECK-NEXT: [[MAGIC:%.*]] = select i1 [[SHOULD_SIGNEXT]], i32 [[ALL_BITS_EXCEPT_LOW_NBITS]], i32 0 ; CHECK-NEXT: call void @use32(i32 [[LOW_BITS_TO_SKIP]]) ; CHECK-NEXT: call void @use64(i64 [[LOW_BITS_TO_SKIP_WIDE]]) ; CHECK-NEXT: call void @use64(i64 [[HIGH_BITS_EXTRACTED_WIDE]]) ; CHECK-NEXT: call void @use32(i32 [[HIGH_BITS_EXTRACTED]]) ; CHECK-NEXT: call void @use1(i1 [[SHOULD_SIGNEXT]]) ; CHECK-NEXT: call void @use32(i32 [[ALL_BITS_EXCEPT_LOW_NBITS]]) -; CHECK-NEXT: [[SIGNEXTENDED:%.*]] = add i32 [[MAGIC]], [[HIGH_BITS_EXTRACTED]] +; CHECK-NEXT: [[TMP1:%.*]] = ashr i64 [[DATA]], [[LOW_BITS_TO_SKIP_WIDE]] +; CHECK-NEXT: [[SIGNEXTENDED:%.*]] = trunc i64 [[TMP1]] to i32 ; CHECK-NEXT: ret i32 [[SIGNEXTENDED]] ; %low_bits_to_skip = sub i32 64, %nbits @@ -286,7 +286,6 @@ define i32 @t8_trunc_extrause1(i64 %data, i32 %nbits) { ; CHECK-NEXT: [[LOW_BITS_TO_SKIP:%.*]] = sub i32 64, [[NBITS:%.*]] ; CHECK-NEXT: [[LOW_BITS_TO_SKIP_WIDE:%.*]] = zext i32 [[LOW_BITS_TO_SKIP]] to i64 ; CHECK-NEXT: [[HIGH_BITS_EXTRACTED_WIDE:%.*]] = lshr i64 [[DATA:%.*]], [[LOW_BITS_TO_SKIP_WIDE]] -; CHECK-NEXT: [[HIGH_BITS_EXTRACTED:%.*]] = trunc i64 [[HIGH_BITS_EXTRACTED_WIDE]] to i32 ; CHECK-NEXT: [[SHOULD_SIGNEXT:%.*]] = icmp slt i64 [[DATA]], 0 ; CHECK-NEXT: [[ALL_BITS_EXCEPT_LOW_NBITS:%.*]] = shl i32 -1, [[NBITS]] ; CHECK-NEXT: [[MAGIC:%.*]] = select i1 [[SHOULD_SIGNEXT]], i32 [[ALL_BITS_EXCEPT_LOW_NBITS]], i32 0 @@ -296,7 +295,8 @@ define i32 @t8_trunc_extrause1(i64 %data, i32 %nbits) { ; CHECK-NEXT: call void @use1(i1 [[SHOULD_SIGNEXT]]) ; CHECK-NEXT: call void @use32(i32 [[ALL_BITS_EXCEPT_LOW_NBITS]]) ; CHECK-NEXT: call void @use32(i32 [[MAGIC]]) -; CHECK-NEXT: [[SIGNEXTENDED:%.*]] = add i32 [[MAGIC]], [[HIGH_BITS_EXTRACTED]] +; CHECK-NEXT: [[TMP1:%.*]] = ashr i64 [[DATA]], [[LOW_BITS_TO_SKIP_WIDE]] +; CHECK-NEXT: [[SIGNEXTENDED:%.*]] = trunc i64 [[TMP1]] to i32 ; CHECK-NEXT: ret i32 [[SIGNEXTENDED]] ; %low_bits_to_skip = sub i32 64, %nbits @@ -368,7 +368,7 @@ define i32 @t10_preserve_exact(i32 %data, i32 %nbits) { ; CHECK-NEXT: call void @use1(i1 [[SHOULD_SIGNEXT]]) ; CHECK-NEXT: call void @use32(i32 [[ALL_BITS_EXCEPT_LOW_NBITS]]) ; CHECK-NEXT: call void @use32(i32 [[MAGIC]]) -; CHECK-NEXT: [[SIGNEXTENDED:%.*]] = add i32 [[HIGH_BITS_EXTRACTED]], [[MAGIC]] +; CHECK-NEXT: [[SIGNEXTENDED:%.*]] = ashr exact i32 [[DATA]], [[LOW_BITS_TO_SKIP]] ; CHECK-NEXT: ret i32 [[SIGNEXTENDED]] ; %low_bits_to_skip = sub i32 32, %nbits @@ -405,7 +405,7 @@ define i32 @t11_different_zext_of_shamt(i32 %data, i8 %nbits) { ; CHECK-NEXT: call void @use32(i32 [[NBITS_32BIT]]) ; CHECK-NEXT: call void @use32(i32 [[ALL_BITS_EXCEPT_LOW_NBITS]]) ; CHECK-NEXT: call void @use32(i32 [[MAGIC]]) -; CHECK-NEXT: [[SIGNEXTENDED:%.*]] = add i32 [[HIGH_BITS_EXTRACTED]], [[MAGIC]] +; CHECK-NEXT: [[SIGNEXTENDED:%.*]] = ashr i32 [[DATA]], [[LOW_BITS_TO_SKIP_32]] ; CHECK-NEXT: ret i32 [[SIGNEXTENDED]] ; %nbits_16bit = zext i8 %nbits to i16 @@ -448,7 +448,7 @@ define i32 @t12_add_sext_of_magic(i32 %data, i8 %nbits) { ; CHECK-NEXT: call void @use16(i16 [[ALL_BITS_EXCEPT_LOW_NBITS]]) ; CHECK-NEXT: call void @use16(i16 [[MAGIC]]) ; CHECK-NEXT: call void @use32(i32 [[MAGIC_WIDE]]) -; CHECK-NEXT: [[SIGNEXTENDED:%.*]] = add i32 [[HIGH_BITS_EXTRACTED]], [[MAGIC_WIDE]] +; CHECK-NEXT: [[SIGNEXTENDED:%.*]] = ashr i32 [[DATA]], [[LOW_BITS_TO_SKIP]] ; CHECK-NEXT: ret i32 [[SIGNEXTENDED]] ; %nbits_32bit = zext i8 %nbits to i32 @@ -491,7 +491,7 @@ define i32 @t13_sub_zext_of_magic(i32 %data, i8 %nbits) { ; CHECK-NEXT: call void @use16(i16 [[ALL_BITS_EXCEPT_LOW_NBITS]]) ; CHECK-NEXT: call void @use16(i16 [[MAGIC]]) ; CHECK-NEXT: call void @use32(i32 [[MAGIC_WIDE]]) -; CHECK-NEXT: [[SIGNEXTENDED:%.*]] = sub i32 [[HIGH_BITS_EXTRACTED]], [[MAGIC_WIDE]] +; CHECK-NEXT: [[SIGNEXTENDED:%.*]] = ashr i32 [[DATA]], [[LOW_BITS_TO_SKIP]] ; CHECK-NEXT: ret i32 [[SIGNEXTENDED]] ; %nbits_32bit = zext i8 %nbits to i32 @@ -534,7 +534,7 @@ define i32 @t14_add_sext_of_shl(i32 %data, i8 %nbits) { ; CHECK-NEXT: call void @use16(i16 [[ALL_BITS_EXCEPT_LOW_NBITS]]) ; CHECK-NEXT: call void @use32(i32 [[ALL_BITS_EXCEPT_LOW_NBITS_WIDE]]) ; CHECK-NEXT: call void @use32(i32 [[MAGIC]]) -; CHECK-NEXT: [[SIGNEXTENDED:%.*]] = add i32 [[HIGH_BITS_EXTRACTED]], [[MAGIC]] +; CHECK-NEXT: [[SIGNEXTENDED:%.*]] = ashr i32 [[DATA]], [[LOW_BITS_TO_SKIP]] ; CHECK-NEXT: ret i32 [[SIGNEXTENDED]] ; %nbits_32bit = zext i8 %nbits to i32 @@ -577,7 +577,7 @@ define i32 @t15_sub_zext_of_shl(i32 %data, i8 %nbits) { ; CHECK-NEXT: call void @use16(i16 [[ALL_BITS_EXCEPT_LOW_NBITS]]) ; CHECK-NEXT: call void @use32(i32 [[ALL_BITS_EXCEPT_LOW_NBITS_WIDE]]) ; CHECK-NEXT: call void @use32(i32 [[MAGIC]]) -; CHECK-NEXT: [[SIGNEXTENDED:%.*]] = sub i32 [[HIGH_BITS_EXTRACTED]], [[MAGIC]] +; CHECK-NEXT: [[SIGNEXTENDED:%.*]] = ashr i32 [[DATA]], [[LOW_BITS_TO_SKIP]] ; CHECK-NEXT: ret i32 [[SIGNEXTENDED]] ; %nbits_32bit = zext i8 %nbits to i32 From 1097fab1cf41e786a659b1fe45a1494170be6952 Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Mon, 7 Oct 2019 21:07:57 +0000 Subject: [PATCH 165/254] [Attributor] Deduce memory behavior of functions and arguments Deduce the memory behavior, aka "read-none", "read-only", or "write-only", for functions and arguments. Reviewers: sstefan1, uenoku Subscribers: hiraditya, bollu, jfb, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D67384 llvm-svn: 373965 --- llvm/include/llvm/Transforms/IPO/Attributor.h | 76 ++- llvm/lib/Transforms/IPO/Attributor.cpp | 483 +++++++++++++++++- llvm/test/Transforms/FunctionAttrs/align.ll | 12 +- .../Transforms/FunctionAttrs/arg_nocapture.ll | 20 +- .../Transforms/FunctionAttrs/arg_returned.ll | 54 +- .../FunctionAttrs/dereferenceable.ll | 12 +- .../FunctionAttrs/internal-noalias.ll | 6 +- .../test/Transforms/FunctionAttrs/liveness.ll | 14 +- .../FunctionAttrs/noalias_returned.ll | 2 +- .../Transforms/FunctionAttrs/nocapture.ll | 43 +- .../FunctionAttrs/nofree-attributor.ll | 14 +- llvm/test/Transforms/FunctionAttrs/nonnull.ll | 22 +- .../Transforms/FunctionAttrs/norecurse.ll | 16 +- llvm/test/Transforms/FunctionAttrs/nosync.ll | 28 +- .../read_write_returned_arguments_scc.ll | 2 +- .../Transforms/FunctionAttrs/readattrs.ll | 41 +- .../Transforms/FunctionAttrs/willreturn.ll | 28 +- 17 files changed, 710 insertions(+), 163 deletions(-) diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h index cf72d44867343..517afe85ec4c4 100644 --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -408,7 +408,11 @@ struct IRPosition { /// Return true if any kind in \p AKs existing in the IR at a position that /// will affect this one. See also getAttrs(...). - bool hasAttr(ArrayRef AKs) const; + /// \param IgnoreSubsumingPositions Flag to determine if subsuming positions, + /// e.g., the function position if this is an + /// argument position, should be ignored. + bool hasAttr(ArrayRef AKs, + bool IgnoreSubsumingPositions = false) const; /// Return the attributes of any kind in \p AKs existing in the IR at a /// position that will affect this one. While each position can only have a @@ -434,6 +438,28 @@ struct IRPosition { return Attribute(); } + /// Remove the attribute of kind \p AKs existing in the IR at this position. + void removeAttrs(ArrayRef AKs) { + if (getPositionKind() == IRP_INVALID || getPositionKind() == IRP_FLOAT) + return; + + AttributeList AttrList; + CallSite CS = CallSite(&getAnchorValue()); + if (CS) + AttrList = CS.getAttributes(); + else + AttrList = getAssociatedFunction()->getAttributes(); + + LLVMContext &Ctx = getAnchorValue().getContext(); + for (Attribute::AttrKind AK : AKs) + AttrList = AttrList.removeAttribute(Ctx, getAttrIdx(), AK); + + if (CS) + CS.setAttributes(AttrList); + else + getAssociatedFunction()->setAttributes(AttrList); + } + bool isAnyCallSitePosition() const { switch (getPositionKind()) { case IRPosition::IRP_CALL_SITE: @@ -1824,6 +1850,54 @@ struct AAHeapToStack : public StateWrapper, static const char ID; }; +/// An abstract interface for all memory related attributes. +struct AAMemoryBehavior + : public IRAttribute> { + AAMemoryBehavior(const IRPosition &IRP) : IRAttribute(IRP) {} + + /// State encoding bits. A set bit in the state means the property holds. + /// BEST_STATE is the best possible state, 0 the worst possible state. + enum { + NO_READS = 1 << 0, + NO_WRITES = 1 << 1, + NO_ACCESSES = NO_READS | NO_WRITES, + + BEST_STATE = NO_ACCESSES, + }; + + /// Return true if we know that the underlying value is not read or accessed + /// in its respective scope. + bool isKnownReadNone() const { return isKnown(NO_ACCESSES); } + + /// Return true if we assume that the underlying value is not read or accessed + /// in its respective scope. + bool isAssumedReadNone() const { return isAssumed(NO_ACCESSES); } + + /// Return true if we know that the underlying value is not accessed + /// (=written) in its respective scope. + bool isKnownReadOnly() const { return isKnown(NO_WRITES); } + + /// Return true if we assume that the underlying value is not accessed + /// (=written) in its respective scope. + bool isAssumedReadOnly() const { return isAssumed(NO_WRITES); } + + /// Return true if we know that the underlying value is not read in its + /// respective scope. + bool isKnownWriteOnly() const { return isKnown(NO_READS); } + + /// Return true if we assume that the underlying value is not read in its + /// respective scope. + bool isAssumedWriteOnly() const { return isAssumed(NO_READS); } + + /// Create an abstract attribute view for the position \p IRP. + static AAMemoryBehavior &createForPosition(const IRPosition &IRP, + Attributor &A); + + /// Unique ID (due to the unique address) + static const char ID; +}; + } // end namespace llvm #endif // LLVM_TRANSFORMS_IPO_FUNCTIONATTRS_H diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp index 58ce91c807dd7..6b9888bcdae51 100644 --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -418,11 +418,18 @@ SubsumingPositionIterator::SubsumingPositionIterator(const IRPosition &IRP) { } } -bool IRPosition::hasAttr(ArrayRef AKs) const { - for (const IRPosition &EquivIRP : SubsumingPositionIterator(*this)) +bool IRPosition::hasAttr(ArrayRef AKs, + bool IgnoreSubsumingPositions) const { + for (const IRPosition &EquivIRP : SubsumingPositionIterator(*this)) { for (Attribute::AttrKind AK : AKs) if (EquivIRP.getAttr(AK).getKindAsEnum() == AK) return true; + // The first position returned by the SubsumingPositionIterator is + // always the position itself. If we ignore subsuming positions we + // are done after the first iteration. + if (IgnoreSubsumingPositions) + break; + } return false; } @@ -3437,6 +3444,448 @@ struct AAHeapToStackFunction final : public AAHeapToStackImpl { }; } // namespace +/// -------------------- Memory Behavior Attributes ---------------------------- +/// Includes read-none, read-only, and write-only. +/// ---------------------------------------------------------------------------- +struct AAMemoryBehaviorImpl : public AAMemoryBehavior { + AAMemoryBehaviorImpl(const IRPosition &IRP) : AAMemoryBehavior(IRP) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + intersectAssumedBits(BEST_STATE); + getKnownStateFromValue(getIRPosition(), getState()); + IRAttribute::initialize(A); + } + + /// Return the memory behavior information encoded in the IR for \p IRP. + static void getKnownStateFromValue(const IRPosition &IRP, + IntegerState &State) { + SmallVector Attrs; + IRP.getAttrs(AttrKinds, Attrs); + for (const Attribute &Attr : Attrs) { + switch (Attr.getKindAsEnum()) { + case Attribute::ReadNone: + State.addKnownBits(NO_ACCESSES); + break; + case Attribute::ReadOnly: + State.addKnownBits(NO_WRITES); + break; + case Attribute::WriteOnly: + State.addKnownBits(NO_READS); + break; + default: + llvm_unreachable("Unexpcted attribute!"); + } + } + + if (auto *I = dyn_cast(&IRP.getAnchorValue())) { + if (!I->mayReadFromMemory()) + State.addKnownBits(NO_READS); + if (!I->mayWriteToMemory()) + State.addKnownBits(NO_WRITES); + } + } + + /// See AbstractAttribute::getDeducedAttributes(...). + void getDeducedAttributes(LLVMContext &Ctx, + SmallVectorImpl &Attrs) const override { + assert(Attrs.size() == 0); + if (isAssumedReadNone()) + Attrs.push_back(Attribute::get(Ctx, Attribute::ReadNone)); + else if (isAssumedReadOnly()) + Attrs.push_back(Attribute::get(Ctx, Attribute::ReadOnly)); + else if (isAssumedWriteOnly()) + Attrs.push_back(Attribute::get(Ctx, Attribute::WriteOnly)); + assert(Attrs.size() <= 1); + } + + /// See AbstractAttribute::manifest(...). + ChangeStatus manifest(Attributor &A) override { + IRPosition &IRP = getIRPosition(); + + // Check if we would improve the existing attributes first. + SmallVector DeducedAttrs; + getDeducedAttributes(IRP.getAnchorValue().getContext(), DeducedAttrs); + if (llvm::all_of(DeducedAttrs, [&](const Attribute &Attr) { + return IRP.hasAttr(Attr.getKindAsEnum(), + /* IgnoreSubsumingPositions */ true); + })) + return ChangeStatus::UNCHANGED; + + // Clear existing attributes. + IRP.removeAttrs(AttrKinds); + + // Use the generic manifest method. + return IRAttribute::manifest(A); + } + + /// See AbstractState::getAsStr(). + const std::string getAsStr() const override { + if (isAssumedReadNone()) + return "readnone"; + if (isAssumedReadOnly()) + return "readonly"; + if (isAssumedWriteOnly()) + return "writeonly"; + return "may-read/write"; + } + + /// The set of IR attributes AAMemoryBehavior deals with. + static const Attribute::AttrKind AttrKinds[3]; +}; + +const Attribute::AttrKind AAMemoryBehaviorImpl::AttrKinds[] = { + Attribute::ReadNone, Attribute::ReadOnly, Attribute::WriteOnly}; + +/// Memory behavior attribute for a floating value. +struct AAMemoryBehaviorFloating : AAMemoryBehaviorImpl { + AAMemoryBehaviorFloating(const IRPosition &IRP) : AAMemoryBehaviorImpl(IRP) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + AAMemoryBehaviorImpl::initialize(A); + // Initialize the use vector with all direct uses of the associated value. + for (const Use &U : getAssociatedValue().uses()) + Uses.insert(&U); + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override; + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + if (isAssumedReadNone()) + STATS_DECLTRACK_FLOATING_ATTR(readnone) + else if (isAssumedReadOnly()) + STATS_DECLTRACK_FLOATING_ATTR(readonly) + else if (isAssumedWriteOnly()) + STATS_DECLTRACK_FLOATING_ATTR(writeonly) + } + +private: + /// Return true if users of \p UserI might access the underlying + /// variable/location described by \p U and should therefore be analyzed. + bool followUsersOfUseIn(Attributor &A, const Use *U, + const Instruction *UserI); + + /// Update the state according to the effect of use \p U in \p UserI. + void analyzeUseIn(Attributor &A, const Use *U, const Instruction *UserI); + +protected: + /// Container for (transitive) uses of the associated argument. + SetVector Uses; +}; + +/// Memory behavior attribute for function argument. +struct AAMemoryBehaviorArgument : AAMemoryBehaviorFloating { + AAMemoryBehaviorArgument(const IRPosition &IRP) + : AAMemoryBehaviorFloating(IRP) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + AAMemoryBehaviorFloating::initialize(A); + + // TODO: From readattrs.ll: "inalloca parameters are always + // considered written" + if (hasAttr({Attribute::InAlloca})) + removeAssumedBits(NO_WRITES); + + // Initialize the use vector with all direct uses of the associated value. + Argument *Arg = getAssociatedArgument(); + if (!Arg || !Arg->getParent()->hasExactDefinition()) + indicatePessimisticFixpoint(); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + if (isAssumedReadNone()) + STATS_DECLTRACK_ARG_ATTR(readnone) + else if (isAssumedReadOnly()) + STATS_DECLTRACK_ARG_ATTR(readonly) + else if (isAssumedWriteOnly()) + STATS_DECLTRACK_ARG_ATTR(writeonly) + } +}; + +struct AAMemoryBehaviorCallSiteArgument final : AAMemoryBehaviorArgument { + AAMemoryBehaviorCallSiteArgument(const IRPosition &IRP) + : AAMemoryBehaviorArgument(IRP) {} + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + // TODO: Once we have call site specific value information we can provide + // call site specific liveness liveness information and then it makes + // sense to specialize attributes for call sites arguments instead of + // redirecting requests to the callee argument. + Argument *Arg = getAssociatedArgument(); + const IRPosition &ArgPos = IRPosition::argument(*Arg); + auto &ArgAA = A.getAAFor(*this, ArgPos); + return clampStateAndIndicateChange( + getState(), + static_cast(ArgAA.getState())); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + if (isAssumedReadNone()) + STATS_DECLTRACK_CSARG_ATTR(readnone) + else if (isAssumedReadOnly()) + STATS_DECLTRACK_CSARG_ATTR(readonly) + else if (isAssumedWriteOnly()) + STATS_DECLTRACK_CSARG_ATTR(writeonly) + } +}; + +/// Memory behavior attribute for a call site return position. +struct AAMemoryBehaviorCallSiteReturned final : AAMemoryBehaviorFloating { + AAMemoryBehaviorCallSiteReturned(const IRPosition &IRP) + : AAMemoryBehaviorFloating(IRP) {} + + /// See AbstractAttribute::manifest(...). + ChangeStatus manifest(Attributor &A) override { + // We do not annotate returned values. + return ChangeStatus::UNCHANGED; + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override {} +}; + +/// An AA to represent the memory behavior function attributes. +struct AAMemoryBehaviorFunction final : public AAMemoryBehaviorImpl { + AAMemoryBehaviorFunction(const IRPosition &IRP) : AAMemoryBehaviorImpl(IRP) {} + + /// See AbstractAttribute::updateImpl(Attributor &A). + virtual ChangeStatus updateImpl(Attributor &A) override; + + /// See AbstractAttribute::manifest(...). + ChangeStatus manifest(Attributor &A) override { + Function &F = cast(getAnchorValue()); + if (isAssumedReadNone()) { + F.removeFnAttr(Attribute::ArgMemOnly); + F.removeFnAttr(Attribute::InaccessibleMemOnly); + F.removeFnAttr(Attribute::InaccessibleMemOrArgMemOnly); + } + return AAMemoryBehaviorImpl::manifest(A); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + if (isAssumedReadNone()) + STATS_DECLTRACK_FN_ATTR(readnone) + else if (isAssumedReadOnly()) + STATS_DECLTRACK_FN_ATTR(readonly) + else if (isAssumedWriteOnly()) + STATS_DECLTRACK_FN_ATTR(writeonly) + } +}; + +/// AAMemoryBehavior attribute for call sites. +struct AAMemoryBehaviorCallSite final : AAMemoryBehaviorImpl { + AAMemoryBehaviorCallSite(const IRPosition &IRP) : AAMemoryBehaviorImpl(IRP) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + AAMemoryBehaviorImpl::initialize(A); + Function *F = getAssociatedFunction(); + if (!F || !F->hasExactDefinition()) + indicatePessimisticFixpoint(); + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + // TODO: Once we have call site specific value information we can provide + // call site specific liveness liveness information and then it makes + // sense to specialize attributes for call sites arguments instead of + // redirecting requests to the callee argument. + Function *F = getAssociatedFunction(); + const IRPosition &FnPos = IRPosition::function(*F); + auto &FnAA = A.getAAFor(*this, FnPos); + return clampStateAndIndicateChange( + getState(), static_cast(FnAA.getState())); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + if (isAssumedReadNone()) + STATS_DECLTRACK_CS_ATTR(readnone) + else if (isAssumedReadOnly()) + STATS_DECLTRACK_CS_ATTR(readonly) + else if (isAssumedWriteOnly()) + STATS_DECLTRACK_CS_ATTR(writeonly) + } +}; + +ChangeStatus AAMemoryBehaviorFunction::updateImpl(Attributor &A) { + + // The current assumed state used to determine a change. + auto AssumedState = getAssumed(); + + auto CheckRWInst = [&](Instruction &I) { + // If the instruction has an own memory behavior state, use it to restrict + // the local state. No further analysis is required as the other memory + // state is as optimistic as it gets. + if (ImmutableCallSite ICS = ImmutableCallSite(&I)) { + const auto &MemBehaviorAA = A.getAAFor( + *this, IRPosition::callsite_function(ICS)); + intersectAssumedBits(MemBehaviorAA.getAssumed()); + return !isAtFixpoint(); + } + + // Remove access kind modifiers if necessary. + if (I.mayReadFromMemory()) + removeAssumedBits(NO_READS); + if (I.mayWriteToMemory()) + removeAssumedBits(NO_WRITES); + return !isAtFixpoint(); + }; + + if (!A.checkForAllReadWriteInstructions(CheckRWInst, *this)) + return indicatePessimisticFixpoint(); + + return (AssumedState != getAssumed()) ? ChangeStatus::CHANGED + : ChangeStatus::UNCHANGED; +} + +ChangeStatus AAMemoryBehaviorFloating::updateImpl(Attributor &A) { + + const IRPosition &IRP = getIRPosition(); + const IRPosition &FnPos = IRPosition::function_scope(IRP); + AAMemoryBehavior::StateType &S = getState(); + + // First, check the function scope. We take the known information and we avoid + // work if the assumed information implies the current assumed information for + // this attribute. + const auto &FnMemAA = A.getAAFor(*this, FnPos); + S.addKnownBits(FnMemAA.getKnown()); + if ((S.getAssumed() & FnMemAA.getAssumed()) == S.getAssumed()) + return ChangeStatus::UNCHANGED; + + // Make sure the value is not captured (except through "return"), if + // it is, any information derived would be irrelevant anyway as we cannot + // check the potential aliases introduced by the capture. + const auto &ArgNoCaptureAA = A.getAAFor(*this, IRP); + if (!ArgNoCaptureAA.isAssumedNoCaptureMaybeReturned()) + return indicatePessimisticFixpoint(); + + // The current assumed state used to determine a change. + auto AssumedState = S.getAssumed(); + + // Liveness information to exclude dead users. + // TODO: Take the FnPos once we have call site specific liveness information. + const auto &LivenessAA = A.getAAFor( + *this, IRPosition::function(*IRP.getAssociatedFunction())); + + // Visit and expand uses until all are analyzed or a fixpoint is reached. + for (unsigned i = 0; i < Uses.size() && !isAtFixpoint(); i++) { + const Use *U = Uses[i]; + Instruction *UserI = cast(U->getUser()); + LLVM_DEBUG(dbgs() << "[AAMemoryBehavior] Use: " << **U << " in " << *UserI + << " [Dead: " << (LivenessAA.isAssumedDead(UserI)) + << "]\n"); + if (LivenessAA.isAssumedDead(UserI)) + continue; + + // Check if the users of UserI should also be visited. + if (followUsersOfUseIn(A, U, UserI)) + for (const Use &UserIUse : UserI->uses()) + Uses.insert(&UserIUse); + + // If UserI might touch memory we analyze the use in detail. + if (UserI->mayReadOrWriteMemory()) + analyzeUseIn(A, U, UserI); + } + + return (AssumedState != getAssumed()) ? ChangeStatus::CHANGED + : ChangeStatus::UNCHANGED; +} + +bool AAMemoryBehaviorFloating::followUsersOfUseIn(Attributor &A, const Use *U, + const Instruction *UserI) { + // The loaded value is unrelated to the pointer argument, no need to + // follow the users of the load. + if (isa(UserI)) + return false; + + // By default we follow all uses assuming UserI might leak information on U, + // we have special handling for call sites operands though. + ImmutableCallSite ICS(UserI); + if (!ICS || !ICS.isArgOperand(U)) + return true; + + // If the use is a call argument known not to be captured, the users of + // the call do not need to be visited because they have to be unrelated to + // the input. Note that this check is not trivial even though we disallow + // general capturing of the underlying argument. The reason is that the + // call might the argument "through return", which we allow and for which we + // need to check call users. + unsigned ArgNo = ICS.getArgumentNo(U); + const auto &ArgNoCaptureAA = + A.getAAFor(*this, IRPosition::callsite_argument(ICS, ArgNo)); + return !ArgNoCaptureAA.isAssumedNoCapture(); +} + +void AAMemoryBehaviorFloating::analyzeUseIn(Attributor &A, const Use *U, + const Instruction *UserI) { + assert(UserI->mayReadOrWriteMemory()); + + switch (UserI->getOpcode()) { + default: + // TODO: Handle all atomics and other side-effect operations we know of. + break; + case Instruction::Load: + // Loads cause the NO_READS property to disappear. + removeAssumedBits(NO_READS); + return; + + case Instruction::Store: + // Stores cause the NO_WRITES property to disappear if the use is the + // pointer operand. Note that we do assume that capturing was taken care of + // somewhere else. + if (cast(UserI)->getPointerOperand() == U->get()) + removeAssumedBits(NO_WRITES); + return; + + case Instruction::Call: + case Instruction::CallBr: + case Instruction::Invoke: { + // For call sites we look at the argument memory behavior attribute (this + // could be recursive!) in order to restrict our own state. + ImmutableCallSite ICS(UserI); + + // Give up on operand bundles. + if (ICS.isBundleOperand(U)) { + indicatePessimisticFixpoint(); + return; + } + + // Calling a function does read the function pointer, maybe write it if the + // function is self-modifying. + if (ICS.isCallee(U)) { + removeAssumedBits(NO_READS); + break; + } + + // Adjust the possible access behavior based on the information on the + // argument. + unsigned ArgNo = ICS.getArgumentNo(U); + const IRPosition &ArgPos = IRPosition::callsite_argument(ICS, ArgNo); + const auto &MemBehaviorAA = A.getAAFor(*this, ArgPos); + // "assumed" has at most the same bits as the MemBehaviorAA assumed + // and at least "known". + intersectAssumedBits(MemBehaviorAA.getAssumed()); + return; + } + }; + + // Generally, look at the "may-properties" and adjust the assumed state if we + // did not trigger special handling before. + if (UserI->mayReadFromMemory()) + removeAssumedBits(NO_READS); + if (UserI->mayWriteToMemory()) + removeAssumedBits(NO_WRITES); +} + /// ---------------------------------------------------------------------------- /// Attributor /// ---------------------------------------------------------------------------- @@ -3607,7 +4056,8 @@ bool Attributor::checkForAllInstructions( auto &OpcodeInstMap = InfoCache.getOpcodeInstMapForFunction(*AssociatedFunction); - if (!checkForAllInstructionsImpl(OpcodeInstMap, Pred, &LivenessAA, AnyDead, Opcodes)) + if (!checkForAllInstructionsImpl(OpcodeInstMap, Pred, &LivenessAA, AnyDead, + Opcodes)) return false; // If we actually used liveness information so we have to record a dependence. @@ -3965,6 +4415,9 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) { // Every function might be "no-recurse". getOrCreateAAFor(FPos); + // Every function might be "readnone/readonly/writeonly/...". + getOrCreateAAFor(FPos); + // Every function might be applicable for Heap-To-Stack conversion. if (EnableHeapToStack) getOrCreateAAFor(FPos); @@ -4019,6 +4472,10 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) { // Every argument with pointer type might be marked nocapture. getOrCreateAAFor(ArgPos); + + // Every argument with pointer type might be marked + // "readnone/readonly/writeonly/..." + getOrCreateAAFor(ArgPos); } } @@ -4232,6 +4689,7 @@ const char AAAlign::ID = 0; const char AANoCapture::ID = 0; const char AAValueSimplify::ID = 0; const char AAHeapToStack::ID = 0; +const char AAMemoryBehavior::ID = 0; // Macro magic to create the static generator function for attributes that // follow the naming scheme. @@ -4310,6 +4768,23 @@ const char AAHeapToStack::ID = 0; return *AA; \ } +#define CREATE_NON_RET_ABSTRACT_ATTRIBUTE_FOR_POSITION(CLASS) \ + CLASS &CLASS::createForPosition(const IRPosition &IRP, Attributor &A) { \ + CLASS *AA = nullptr; \ + switch (IRP.getPositionKind()) { \ + SWITCH_PK_INV(CLASS, IRP_INVALID, "invalid") \ + SWITCH_PK_INV(CLASS, IRP_RETURNED, "returned") \ + SWITCH_PK_CREATE(CLASS, IRP, IRP_FUNCTION, Function) \ + SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE, CallSite) \ + SWITCH_PK_CREATE(CLASS, IRP, IRP_FLOAT, Floating) \ + SWITCH_PK_CREATE(CLASS, IRP, IRP_ARGUMENT, Argument) \ + SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE_RETURNED, CallSiteReturned) \ + SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE_ARGUMENT, CallSiteArgument) \ + } \ + AA->initialize(A); \ + return *AA; \ + } + CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoUnwind) CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoSync) CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoFree) @@ -4329,6 +4804,8 @@ CREATE_ALL_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAValueSimplify) CREATE_FUNCTION_ONLY_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAHeapToStack) +CREATE_NON_RET_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAMemoryBehavior) + #undef CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION #undef CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION #undef CREATE_ALL_ABSTRACT_ATTRIBUTE_FOR_POSITION diff --git a/llvm/test/Transforms/FunctionAttrs/align.ll b/llvm/test/Transforms/FunctionAttrs/align.ll index fc5ffc61e8ead..52f94e7a16409 100644 --- a/llvm/test/Transforms/FunctionAttrs/align.ll +++ b/llvm/test/Transforms/FunctionAttrs/align.ll @@ -7,26 +7,26 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" ; TEST 1 -; ATTRIBUTOR: define align 8 i32* @test1(i32* returned align 8 "no-capture-maybe-returned" %0) +; ATTRIBUTOR: define align 8 i32* @test1(i32* readnone returned align 8 "no-capture-maybe-returned" %0) define i32* @test1(i32* align 8 %0) #0 { ret i32* %0 } ; TEST 2 -; ATTRIBUTOR: define i32* @test2(i32* returned "no-capture-maybe-returned" %0) +; ATTRIBUTOR: define i32* @test2(i32* readnone returned "no-capture-maybe-returned" %0) define i32* @test2(i32* %0) #0 { ret i32* %0 } ; TEST 3 -; ATTRIBUTOR: define align 4 i32* @test3(i32* align 8 "no-capture-maybe-returned" %0, i32* align 4 "no-capture-maybe-returned" %1, i1 %2) +; ATTRIBUTOR: define align 4 i32* @test3(i32* readnone align 8 "no-capture-maybe-returned" %0, i32* readnone align 4 "no-capture-maybe-returned" %1, i1 %2) define i32* @test3(i32* align 8 %0, i32* align 4 %1, i1 %2) #0 { %ret = select i1 %2, i32* %0, i32* %1 ret i32* %ret } ; TEST 4 -; ATTRIBUTOR: define align 32 i32* @test4(i32* align 32 "no-capture-maybe-returned" %0, i32* align 32 "no-capture-maybe-returned" %1, i1 %2) +; ATTRIBUTOR: define align 32 i32* @test4(i32* readnone align 32 "no-capture-maybe-returned" %0, i32* readnone align 32 "no-capture-maybe-returned" %1, i1 %2) define i32* @test4(i32* align 32 %0, i32* align 32 %1, i1 %2) #0 { %ret = select i1 %2, i32* %0, i32* %1 ret i32* %ret @@ -139,7 +139,7 @@ define internal i8* @f3(i8* readnone %0) local_unnamed_addr #0 { ; TEST 7 ; Better than IR information -; ATTRIBUTOR: define align 32 i32* @test7(i32* returned align 32 "no-capture-maybe-returned" %p) +; ATTRIBUTOR: define align 32 i32* @test7(i32* readnone returned align 32 "no-capture-maybe-returned" %p) define align 4 i32* @test7(i32* align 32 %p) #0 { tail call i8* @f1(i8* align 8 dereferenceable(1) @a1) ret i32* %p @@ -162,7 +162,7 @@ define void @test8_helper() { } define internal void @test8(i32* %a, i32* %b, i32* %c) { -; ATTRIBUTOR: define internal void @test8(i32* nocapture align 4 %a, i32* nocapture align 4 %b, i32* nocapture %c) +; ATTRIBUTOR: define internal void @test8(i32* nocapture readnone align 4 %a, i32* nocapture readnone align 4 %b, i32* nocapture readnone %c) ret void } diff --git a/llvm/test/Transforms/FunctionAttrs/arg_nocapture.ll b/llvm/test/Transforms/FunctionAttrs/arg_nocapture.ll index c963112947cc7..3b4b054b6e120 100644 --- a/llvm/test/Transforms/FunctionAttrs/arg_nocapture.ll +++ b/llvm/test/Transforms/FunctionAttrs/arg_nocapture.ll @@ -116,8 +116,7 @@ entry: ; ; CHECK: define dereferenceable_or_null(8) i64* @scc_B(double* readnone returned dereferenceable_or_null(8) "no-capture-maybe-returned" %a) ; -; FIXME: readnone missing for %s -; CHECK: define dereferenceable_or_null(2) i8* @scc_C(i16* returned dereferenceable_or_null(2) "no-capture-maybe-returned" %a) +; CHECK: define dereferenceable_or_null(2) i8* @scc_C(i16* readnone returned dereferenceable_or_null(2) "no-capture-maybe-returned" %a) ; ; float *scc_A(int *a) { ; return (float*)(a ? (int*)scc_A((int*)scc_B((double*)scc_C((short*)a))) : a); @@ -245,7 +244,7 @@ declare i32 @printf(i8* nocapture, ...) ; } ; ; There should *not* be a no-capture attribute on %a -; CHECK: define i64* @not_captured_but_returned_0(i64* returned "no-capture-maybe-returned" %a) +; CHECK: define i64* @not_captured_but_returned_0(i64* returned writeonly "no-capture-maybe-returned" %a) define i64* @not_captured_but_returned_0(i64* %a) #0 { entry: store i64 0, i64* %a, align 8 @@ -260,7 +259,7 @@ entry: ; } ; ; There should *not* be a no-capture attribute on %a -; CHECK: define nonnull i64* @not_captured_but_returned_1(i64* "no-capture-maybe-returned" %a) +; CHECK: define nonnull i64* @not_captured_but_returned_1(i64* writeonly "no-capture-maybe-returned" %a) define i64* @not_captured_but_returned_1(i64* %a) #0 { entry: %add.ptr = getelementptr inbounds i64, i64* %a, i64 1 @@ -275,8 +274,7 @@ entry: ; not_captured_but_returned_1(a); ; } ; -; FIXME: no-capture missing for %a -; CHECK: define void @test_not_captured_but_returned_calls(i64* nocapture %a) +; CHECK: define void @test_not_captured_but_returned_calls(i64* nocapture writeonly %a) define void @test_not_captured_but_returned_calls(i64* %a) #0 { entry: %call = call i64* @not_captured_but_returned_0(i64* %a) @@ -291,7 +289,7 @@ entry: ; } ; ; There should *not* be a no-capture attribute on %a -; CHECK: define i64* @negative_test_not_captured_but_returned_call_0a(i64* returned "no-capture-maybe-returned" %a) +; CHECK: define i64* @negative_test_not_captured_but_returned_call_0a(i64* returned writeonly "no-capture-maybe-returned" %a) define i64* @negative_test_not_captured_but_returned_call_0a(i64* %a) #0 { entry: %call = call i64* @not_captured_but_returned_0(i64* %a) @@ -305,7 +303,7 @@ entry: ; } ; ; There should *not* be a no-capture attribute on %a -; CHECK: define void @negative_test_not_captured_but_returned_call_0b(i64* %a) +; CHECK: define void @negative_test_not_captured_but_returned_call_0b(i64* writeonly %a) define void @negative_test_not_captured_but_returned_call_0b(i64* %a) #0 { entry: %call = call i64* @not_captured_but_returned_0(i64* %a) @@ -321,7 +319,7 @@ entry: ; } ; ; There should *not* be a no-capture attribute on %a -; CHECK: define nonnull i64* @negative_test_not_captured_but_returned_call_1a(i64* "no-capture-maybe-returned" %a) +; CHECK: define nonnull i64* @negative_test_not_captured_but_returned_call_1a(i64* writeonly "no-capture-maybe-returned" %a) define i64* @negative_test_not_captured_but_returned_call_1a(i64* %a) #0 { entry: %call = call i64* @not_captured_but_returned_1(i64* %a) @@ -335,7 +333,7 @@ entry: ; } ; ; There should *not* be a no-capture attribute on %a -; CHECK: define void @negative_test_not_captured_but_returned_call_1b(i64* %a) +; CHECK: define void @negative_test_not_captured_but_returned_call_1b(i64* writeonly %a) define void @negative_test_not_captured_but_returned_call_1b(i64* %a) #0 { entry: %call = call i64* @not_captured_but_returned_1(i64* %a) @@ -391,7 +389,7 @@ r: ; TEST not captured by readonly external function ; -; CHECK: define void @not_captured_by_readonly_call(i32* nocapture %b) +; CHECK: define void @not_captured_by_readonly_call(i32* nocapture readonly %b) declare i32* @readonly_unknown(i32*, i32*) readonly define void @not_captured_by_readonly_call(i32* %b) #0 { diff --git a/llvm/test/Transforms/FunctionAttrs/arg_returned.ll b/llvm/test/Transforms/FunctionAttrs/arg_returned.ll index 57d2713d278be..b5c7596222aa5 100644 --- a/llvm/test/Transforms/FunctionAttrs/arg_returned.ll +++ b/llvm/test/Transforms/FunctionAttrs/arg_returned.ll @@ -159,23 +159,16 @@ return: ; preds = %cond.end, %if.then3 ; TEST SCC test returning a pointer value argument ; -; BOTH: Function Attrs: nofree noinline norecurse nosync nounwind readnone uwtable -; BOTH-NEXT: define double* @ptr_sink_r0(double* readnone returned "no-capture-maybe-returned" %r) -; BOTH: Function Attrs: nofree noinline nosync nounwind readnone uwtable -; BOTH-NEXT: define double* @ptr_scc_r1(double* %a, double* readnone returned %r, double* nocapture readnone %b) -; BOTH: Function Attrs: nofree noinline nosync nounwind readnone uwtable -; BOTH-NEXT: define double* @ptr_scc_r2(double* readnone %a, double* readnone %b, double* readnone returned %r) -; ; FNATTR: define double* @ptr_sink_r0(double* readnone returned %r) ; FNATTR: define double* @ptr_scc_r1(double* %a, double* readnone %r, double* nocapture readnone %b) ; FNATTR: define double* @ptr_scc_r2(double* readnone %a, double* readnone %b, double* readnone %r) ; -; ATTRIBUTOR: Function Attrs: nofree noinline nosync nounwind uwtable -; ATTRIBUTOR-NEXT: define double* @ptr_sink_r0(double* returned "no-capture-maybe-returned" %r) -; ATTRIBUTOR: Function Attrs: nofree noinline nosync nounwind uwtable -; ATTRIBUTOR-NEXT: define double* @ptr_scc_r1(double* %a, double* returned %r, double* nocapture %b) -; ATTRIBUTOR: Function Attrs: nofree noinline nosync nounwind uwtable -; ATTRIBUTOR-NEXT: define double* @ptr_scc_r2(double* %a, double* %b, double* returned %r) +; ATTRIBUTOR: Function Attrs: nofree noinline nosync nounwind readnone uwtable +; ATTRIBUTOR-NEXT: define double* @ptr_sink_r0(double* readnone returned "no-capture-maybe-returned" %r) +; ATTRIBUTOR: Function Attrs: nofree noinline nosync nounwind readnone uwtable +; ATTRIBUTOR-NEXT: define double* @ptr_scc_r1(double* readnone %a, double* readnone returned %r, double* nocapture readnone %b) +; ATTRIBUTOR: Function Attrs: nofree noinline nosync nounwind readnone uwtable +; ATTRIBUTOR-NEXT: define double* @ptr_scc_r2(double* readnone %a, double* readnone %b, double* readnone returned %r) ; ; double* ptr_scc_r1(double* a, double* b, double* r); ; double* ptr_scc_r2(double* a, double* b, double* r); @@ -293,7 +286,7 @@ entry: ; ; FNATTR: define i32* @rt2_helper(i32* %a) ; FNATTR: define i32* @rt2(i32* readnone %a, i32* readnone %b) -; BOTH: define i32* @rt2_helper(i32* returned %a) +; BOTH: define i32* @rt2_helper(i32* readnone returned %a) ; BOTH: define i32* @rt2(i32* readnone %a, i32* readnone "no-capture-maybe-returned" %b) define i32* @rt2_helper(i32* %a) #0 { entry: @@ -319,7 +312,7 @@ if.end: ; ; FNATTR: define i32* @rt3_helper(i32* %a, i32* %b) ; FNATTR: define i32* @rt3(i32* readnone %a, i32* readnone %b) -; BOTH: define i32* @rt3_helper(i32* %a, i32* returned "no-capture-maybe-returned" %b) +; BOTH: define i32* @rt3_helper(i32* readnone %a, i32* readnone returned "no-capture-maybe-returned" %b) ; BOTH: define i32* @rt3(i32* readnone %a, i32* readnone returned "no-capture-maybe-returned" %b) define i32* @rt3_helper(i32* %a, i32* %b) #0 { entry: @@ -355,7 +348,7 @@ if.end: ; BOTH: Function Attrs: noinline nounwind uwtable ; BOTH-NEXT: define i32* @calls_unknown_fn(i32* readnone returned "no-capture-maybe-returned" %r) ; FNATTR: define i32* @calls_unknown_fn(i32* readnone returned %r) -; ATTRIBUTOR: define i32* @calls_unknown_fn(i32* returned "no-capture-maybe-returned" %r) +; ATTRIBUTOR: define i32* @calls_unknown_fn(i32* readnone returned "no-capture-maybe-returned" %r) declare void @unknown_fn(i32* (i32*)*) #0 define i32* @calls_unknown_fn(i32* %r) #0 { @@ -443,7 +436,7 @@ entry: ; BOTH-NEXT: define double @select_and_phi(double returned %b) ; ; FNATTR: define double @select_and_phi(double %b) -; ATTRIBUTOR: Function Attrs: nofree noinline nosync nounwind uwtable +; ATTRIBUTOR: Function Attrs: nofree noinline nosync nounwind readnone uwtable ; ATTRIBUTOR-NEXT: define double @select_and_phi(double returned %b) define double @select_and_phi(double %b) #0 { entry: @@ -475,7 +468,7 @@ if.end: ; preds = %if.then, %entry ; ; FNATTR: define double @recursion_select_and_phi(i32 %a, double %b) ; -; ATTRIBUTOR: Function Attrs: nofree noinline nosync nounwind uwtable +; ATTRIBUTOR: Function Attrs: nofree noinline nosync nounwind readnone uwtable ; ATTRIBUTOR-NEXT: define double @recursion_select_and_phi(i32 %a, double returned %b) define double @recursion_select_and_phi(i32 %a, double %b) #0 { entry: @@ -506,8 +499,8 @@ if.end: ; preds = %if.then, %entry ; ; FNATTR: define double* @bitcast(i32* readnone %b) ; -; ATTRIBUTOR: Function Attrs: nofree noinline nosync nounwind uwtable -; ATTRIBUTOR-NEXT: define double* @bitcast(i32* returned "no-capture-maybe-returned" %b) +; ATTRIBUTOR: Function Attrs: nofree noinline nosync nounwind readnone uwtable +; ATTRIBUTOR-NEXT: define double* @bitcast(i32* readnone returned "no-capture-maybe-returned" %b) define double* @bitcast(i32* %b) #0 { entry: %bc0 = bitcast i32* %b to double* @@ -529,8 +522,8 @@ entry: ; ; FNATTR: define double* @bitcasts_select_and_phi(i32* readnone %b) ; -; ATTRIBUTOR: Function Attrs: nofree noinline nosync nounwind uwtable -; ATTRIBUTOR-NEXT: define double* @bitcasts_select_and_phi(i32* returned %b) +; ATTRIBUTOR: Function Attrs: nofree noinline nosync nounwind readnone uwtable +; ATTRIBUTOR-NEXT: define double* @bitcasts_select_and_phi(i32* readnone returned %b) define double* @bitcasts_select_and_phi(i32* %b) #0 { entry: %bc0 = bitcast i32* %b to double* @@ -567,8 +560,8 @@ if.end: ; preds = %if.then, %entry ; ; FNATTR: define double* @ret_arg_arg_undef(i32* readnone %b) ; -; ATTRIBUTOR: Function Attrs: nofree noinline nosync nounwind uwtable -; ATTRIBUTOR-NEXT: define double* @ret_arg_arg_undef(i32* returned %b) +; ATTRIBUTOR: Function Attrs: nofree noinline nosync nounwind readnone uwtable +; ATTRIBUTOR-NEXT: define double* @ret_arg_arg_undef(i32* readnone returned %b) define double* @ret_arg_arg_undef(i32* %b) #0 { entry: %bc0 = bitcast i32* %b to double* @@ -605,8 +598,8 @@ ret_undef: ; ; FNATTR: define double* @ret_undef_arg_arg(i32* readnone %b) ; -; ATTRIBUTOR: Function Attrs: nofree noinline nosync nounwind uwtable -; ATTRIBUTOR-NEXT: define double* @ret_undef_arg_arg(i32* returned %b) +; ATTRIBUTOR: Function Attrs: nofree noinline nosync nounwind readnone uwtable +; ATTRIBUTOR-NEXT: define double* @ret_undef_arg_arg(i32* readnone returned %b) define double* @ret_undef_arg_arg(i32* %b) #0 { entry: %bc0 = bitcast i32* %b to double* @@ -642,7 +635,7 @@ ret_arg1: ; BOTH-NEXT: define double* @ret_undef_arg_undef(i32* readnone returned %b) ; ; FNATTR: define double* @ret_undef_arg_undef(i32* readnone %b) -; ATTRIBUTOR: define double* @ret_undef_arg_undef(i32* returned %b) +; ATTRIBUTOR: define double* @ret_undef_arg_undef(i32* readnone returned %b) define double* @ret_undef_arg_undef(i32* %b) #0 { entry: %bc0 = bitcast i32* %b to double* @@ -846,7 +839,8 @@ attributes #0 = { noinline nounwind uwtable } ; BOTH-DAG: attributes #{{[0-9]*}} = { nofree noinline noreturn nosync nounwind readonly uwtable } ; BOTH-DAG: attributes #{{[0-9]*}} = { noinline nounwind uwtable } ; BOTH-DAG: attributes #{{[0-9]*}} = { noreturn } -; BOTH-DAG: attributes #{{[0-9]*}} = { nofree nosync willreturn } -; BOTH-DAG: attributes #{{[0-9]*}} = { nofree nosync } -; BOTH-DAG: attributes #{{[0-9]*}} = { nofree noreturn nosync } +; BOTH-DAG: attributes #{{[0-9]*}} = { nofree noinline norecurse nosync nounwind readnone uwtable } +; BOTH-DAG: attributes #{{[0-9]*}} = { nofree nosync readnone willreturn } +; BOTH-DAG: attributes #{{[0-9]*}} = { nofree nosync readnone } +; BOTH-DAG: attributes #{{[0-9]*}} = { nofree noreturn nosync readonly } ; BOTH-NOT: attributes # diff --git a/llvm/test/Transforms/FunctionAttrs/dereferenceable.ll b/llvm/test/Transforms/FunctionAttrs/dereferenceable.ll index d7b576f506acb..1ff1e285dd745 100644 --- a/llvm/test/Transforms/FunctionAttrs/dereferenceable.ll +++ b/llvm/test/Transforms/FunctionAttrs/dereferenceable.ll @@ -7,7 +7,7 @@ declare void @deref_phi_user(i32* %a); ; take mininimum of return values ; define i32* @test1(i32* dereferenceable(4) %0, double* dereferenceable(8) %1, i1 zeroext %2) local_unnamed_addr { -; ATTRIBUTOR: define nonnull dereferenceable(4) i32* @test1(i32* nonnull dereferenceable(4) "no-capture-maybe-returned" %0, double* nonnull dereferenceable(8) "no-capture-maybe-returned" %1, i1 zeroext %2) +; ATTRIBUTOR: define nonnull dereferenceable(4) i32* @test1(i32* nonnull readnone dereferenceable(4) "no-capture-maybe-returned" %0, double* nonnull readnone dereferenceable(8) "no-capture-maybe-returned" %1, i1 zeroext %2) %4 = bitcast double* %1 to i32* %5 = select i1 %2, i32* %0, i32* %4 ret i32* %5 @@ -15,7 +15,7 @@ define i32* @test1(i32* dereferenceable(4) %0, double* dereferenceable(8) %1, i1 ; TEST 2 define i32* @test2(i32* dereferenceable_or_null(4) %0, double* dereferenceable(8) %1, i1 zeroext %2) local_unnamed_addr { -; ATTRIBUTOR: define dereferenceable_or_null(4) i32* @test2(i32* dereferenceable_or_null(4) "no-capture-maybe-returned" %0, double* nonnull dereferenceable(8) "no-capture-maybe-returned" %1, i1 zeroext %2) +; ATTRIBUTOR: define dereferenceable_or_null(4) i32* @test2(i32* readnone dereferenceable_or_null(4) "no-capture-maybe-returned" %0, double* nonnull readnone dereferenceable(8) "no-capture-maybe-returned" %1, i1 zeroext %2) %4 = bitcast double* %1 to i32* %5 = select i1 %2, i32* %0, i32* %4 ret i32* %5 @@ -24,20 +24,20 @@ define i32* @test2(i32* dereferenceable_or_null(4) %0, double* dereferenceable(8 ; TEST 3 ; GEP inbounds define i32* @test3_1(i32* dereferenceable(8) %0) local_unnamed_addr { -; ATTRIBUTOR: define nonnull dereferenceable(4) i32* @test3_1(i32* nonnull dereferenceable(8) "no-capture-maybe-returned" %0) +; ATTRIBUTOR: define nonnull dereferenceable(4) i32* @test3_1(i32* nonnull readnone dereferenceable(8) "no-capture-maybe-returned" %0) %ret = getelementptr inbounds i32, i32* %0, i64 1 ret i32* %ret } define i32* @test3_2(i32* dereferenceable_or_null(32) %0) local_unnamed_addr { ; FIXME: Argument should be mark dereferenceable because of GEP `inbounds`. -; ATTRIBUTOR: define nonnull dereferenceable(16) i32* @test3_2(i32* dereferenceable_or_null(32) "no-capture-maybe-returned" %0) +; ATTRIBUTOR: define nonnull dereferenceable(16) i32* @test3_2(i32* readnone dereferenceable_or_null(32) "no-capture-maybe-returned" %0) %ret = getelementptr inbounds i32, i32* %0, i64 4 ret i32* %ret } define i32* @test3_3(i32* dereferenceable(8) %0, i32* dereferenceable(16) %1, i1 %2) local_unnamed_addr { -; ATTRIBUTOR: define nonnull dereferenceable(4) i32* @test3_3(i32* nonnull dereferenceable(8) "no-capture-maybe-returned" %0, i32* nonnull dereferenceable(16) "no-capture-maybe-returned" %1, i1 %2) local_unnamed_addr +; ATTRIBUTOR: define nonnull dereferenceable(4) i32* @test3_3(i32* nonnull readnone dereferenceable(8) "no-capture-maybe-returned" %0, i32* nonnull readnone dereferenceable(16) "no-capture-maybe-returned" %1, i1 %2) local_unnamed_addr %ret1 = getelementptr inbounds i32, i32* %0, i64 1 %ret2 = getelementptr inbounds i32, i32* %1, i64 2 %ret = select i1 %2, i32* %ret1, i32* %ret2 @@ -48,7 +48,7 @@ define i32* @test3_3(i32* dereferenceable(8) %0, i32* dereferenceable(16) %1, i1 ; Better than known in IR. define dereferenceable(4) i32* @test4(i32* dereferenceable(8) %0) local_unnamed_addr { -; ATTRIBUTOR: define nonnull dereferenceable(8) i32* @test4(i32* nonnull returned dereferenceable(8) "no-capture-maybe-returned" %0) +; ATTRIBUTOR: define nonnull dereferenceable(8) i32* @test4(i32* nonnull readnone returned dereferenceable(8) "no-capture-maybe-returned" %0) ret i32* %0 } diff --git a/llvm/test/Transforms/FunctionAttrs/internal-noalias.ll b/llvm/test/Transforms/FunctionAttrs/internal-noalias.ll index 1118fa194f3c5..cc207031015fc 100644 --- a/llvm/test/Transforms/FunctionAttrs/internal-noalias.ll +++ b/llvm/test/Transforms/FunctionAttrs/internal-noalias.ll @@ -8,9 +8,7 @@ entry: ret i32 %add } -; FIXME: Should be something like this. -; define internal i32 @noalias_args(i32* nocapture readonly %A, i32* noalias nocapture readonly %B) -; CHECK: define internal i32 @noalias_args(i32* nocapture %A, i32* noalias nocapture %B) +; CHECK: define internal i32 @noalias_args(i32* nocapture readonly %A, i32* noalias nocapture readonly %B) define internal i32 @noalias_args(i32* %A, i32* %B) #0 { entry: @@ -25,7 +23,7 @@ entry: ; FIXME: Should be something like this. ; define internal i32 @noalias_args_argmem(i32* noalias nocapture readonly %A, i32* noalias nocapture readonly %B) -; CHECK: define internal i32 @noalias_args_argmem(i32* nocapture %A, i32* nocapture %B) +; CHECK: define internal i32 @noalias_args_argmem(i32* nocapture readonly %A, i32* nocapture readonly %B) ; define internal i32 @noalias_args_argmem(i32* %A, i32* %B) #1 { entry: diff --git a/llvm/test/Transforms/FunctionAttrs/liveness.ll b/llvm/test/Transforms/FunctionAttrs/liveness.ll index 771c99309bb1a..080a6bfab84b1 100644 --- a/llvm/test/Transforms/FunctionAttrs/liveness.ll +++ b/llvm/test/Transforms/FunctionAttrs/liveness.ll @@ -39,8 +39,8 @@ define i32 @volatile_load(i32*) norecurse nounwind uwtable { ret i32 %2 } -; CHECK: Function Attrs: nofree norecurse nosync nounwind uwtable willreturn -; CHECK-NEXT: define internal i32 @internal_load(i32* nocapture nonnull %0) +; CHECK: Function Attrs: nofree norecurse nosync nounwind readonly uwtable willreturn +; CHECK-NEXT: define internal i32 @internal_load(i32* nocapture nonnull readonly %0) define internal i32 @internal_load(i32*) norecurse nounwind uwtable { %2 = load i32, i32* %0, align 4 ret i32 %2 @@ -48,11 +48,11 @@ define internal i32 @internal_load(i32*) norecurse nounwind uwtable { ; TEST 1: Only first block is live. ; CHECK: Function Attrs: nofree noreturn nosync nounwind -; CHECK-NEXT: define i32 @first_block_no_return(i32 %a, i32* nocapture nonnull %ptr1, i32* nocapture %ptr2) +; CHECK-NEXT: define i32 @first_block_no_return(i32 %a, i32* nocapture nonnull readonly %ptr1, i32* nocapture readnone %ptr2) define i32 @first_block_no_return(i32 %a, i32* nonnull %ptr1, i32* %ptr2) #0 { entry: call i32 @internal_load(i32* %ptr1) - ; CHECK: call i32 @internal_load(i32* nocapture nonnull %ptr1) + ; CHECK: call i32 @internal_load(i32* nocapture nonnull readonly %ptr1) call void @no_return_call() ; CHECK: call void @no_return_call() ; CHECK-NEXT: unreachable @@ -84,7 +84,7 @@ cond.end: ; preds = %cond.false, %cond.t ; dead block and check if it is deduced. ; CHECK: Function Attrs: nosync -; CHECK-NEXT: define i32 @dead_block_present(i32 %a, i32* nocapture %ptr1) +; CHECK-NEXT: define i32 @dead_block_present(i32 %a, i32* nocapture readnone %ptr1) define i32 @dead_block_present(i32 %a, i32* %ptr1) #0 { entry: %cmp = icmp eq i32 %a, 0 @@ -239,7 +239,7 @@ cleanup: ; TEST 6: Undefined behvior, taken from LangRef. ; FIXME: Should be able to detect undefined behavior. -; CHECK: define void @ub(i32* nocapture %0) +; CHECK: define void @ub(i32* nocapture writeonly %0) define void @ub(i32* %0) { %poison = sub nuw i32 0, 1 ; Results in a poison value. %still_poison = and i32 %poison, 0 ; 0, but also poison. @@ -660,7 +660,7 @@ define internal void @dead_e2() { ret void } ; CHECK: define internal void @non_dead_d13() ; CHECK: define internal void @non_dead_d14() ; Verify we actually deduce information for these functions. -; CHECK: Function Attrs: nofree nosync nounwind willreturn +; CHECK: Function Attrs: nofree nosync nounwind readnone willreturn ; CHECK-NEXT: define internal void @non_dead_d15() ; CHECK-NOT: define internal void @dead_e diff --git a/llvm/test/Transforms/FunctionAttrs/noalias_returned.ll b/llvm/test/Transforms/FunctionAttrs/noalias_returned.ll index 7174c3dc97cb7..6aaf88ccf8dc0 100644 --- a/llvm/test/Transforms/FunctionAttrs/noalias_returned.ll +++ b/llvm/test/Transforms/FunctionAttrs/noalias_returned.ll @@ -153,7 +153,7 @@ define i8* @test8(i32* %0) nounwind uwtable { ; TEST 9 ; Simple Argument Test define internal void @test9(i8* %a, i8* %b) { -; CHECK: define internal void @test9(i8* noalias nocapture %a, i8* nocapture %b) +; CHECK: define internal void @test9(i8* noalias nocapture readnone %a, i8* nocapture readnone %b) ret void } define void @test9_helper(i8* %a, i8* %b) { diff --git a/llvm/test/Transforms/FunctionAttrs/nocapture.ll b/llvm/test/Transforms/FunctionAttrs/nocapture.ll index 673df13cc2495..cef45832592ba 100644 --- a/llvm/test/Transforms/FunctionAttrs/nocapture.ll +++ b/llvm/test/Transforms/FunctionAttrs/nocapture.ll @@ -6,7 +6,7 @@ @g = global i32* null ; [#uses=1] ; FNATTR: define i32* @c1(i32* readnone returned %q) -; ATTRIBUTOR: define i32* @c1(i32* returned "no-capture-maybe-returned" %q) +; ATTRIBUTOR: define i32* @c1(i32* readnone returned "no-capture-maybe-returned" %q) define i32* @c1(i32* %q) { ret i32* %q } @@ -24,7 +24,8 @@ define void @c3(i32* %q) { ret void } -; EITHER: define i1 @c4(i32* %q, i32 %bitno) +; FNATTR: define i1 @c4(i32* %q, i32 %bitno) +; ATTRIBUTOR: define i1 @c4(i32* readnone %q, i32 %bitno) define i1 @c4(i32* %q, i32 %bitno) { %tmp = ptrtoint i32* %q to i32 %tmp2 = lshr i32 %tmp, %bitno @@ -126,8 +127,7 @@ define void @nc3(void ()* %p) { } declare void @external(i8*) readonly nounwind -; FNATTR: define void @nc4(i8* nocapture readonly %p) -; ATTRIBUTOR: define void @nc4(i8* nocapture %p) +; EITHER: define void @nc4(i8* nocapture readonly %p) define void @nc4(i8* %p) { call void @external(i8* %p) ret void @@ -141,7 +141,7 @@ define void @nc5(void (i8*)* %f, i8* %p) { } ; FNATTR: define void @test1_1(i8* nocapture readnone %x1_1, i8* %y1_1, i1 %c) -; ATTRIBUTOR: define void @test1_1(i8* nocapture %x1_1, i8* nocapture %y1_1, i1 %c) +; ATTRIBUTOR: define void @test1_1(i8* nocapture readnone %x1_1, i8* nocapture readnone %y1_1, i1 %c) ; It would be acceptable to add readnone to %y1_1 and %y1_2. define void @test1_1(i8* %x1_1, i8* %y1_1, i1 %c) { call i8* @test1_2(i8* %x1_1, i8* %y1_1, i1 %c) @@ -150,7 +150,7 @@ define void @test1_1(i8* %x1_1, i8* %y1_1, i1 %c) { } ; FNATTR: define i8* @test1_2(i8* nocapture readnone %x1_2, i8* returned %y1_2, i1 %c) -; ATTRIBUTOR: define i8* @test1_2(i8* nocapture %x1_2, i8* returned "no-capture-maybe-returned" %y1_2, i1 %c) +; ATTRIBUTOR: define i8* @test1_2(i8* nocapture readnone %x1_2, i8* readnone returned "no-capture-maybe-returned" %y1_2, i1 %c) define i8* @test1_2(i8* %x1_2, i8* %y1_2, i1 %c) { br i1 %c, label %t, label %f t: @@ -161,16 +161,14 @@ f: ret i8* %y1_2 } -; FNATTR: define void @test2(i8* nocapture readnone %x2) -; ATTRIBUTOR: define void @test2(i8* nocapture %x2) +; EITHER: define void @test2(i8* nocapture readnone %x2) define void @test2(i8* %x2) { call void @test2(i8* %x2) store i32* null, i32** @g ret void } -; FNATTR: define void @test3(i8* nocapture readnone %x3, i8* nocapture readnone %y3, i8* nocapture readnone %z3) -; ATTRIBUTOR: define void @test3(i8* nocapture %x3, i8* nocapture %y3, i8* nocapture %z3) +; EITHER: define void @test3(i8* nocapture readnone %x3, i8* nocapture readnone %y3, i8* nocapture readnone %z3) define void @test3(i8* %x3, i8* %y3, i8* %z3) { call void @test3(i8* %z3, i8* %y3, i8* %x3) store i32* null, i32** @g @@ -178,7 +176,7 @@ define void @test3(i8* %x3, i8* %y3, i8* %z3) { } ; FNATTR: define void @test4_1(i8* %x4_1, i1 %c) -; ATTRIBUTOR: define void @test4_1(i8* nocapture %x4_1, i1 %c) +; ATTRIBUTOR: define void @test4_1(i8* nocapture readnone %x4_1, i1 %c) define void @test4_1(i8* %x4_1, i1 %c) { call i8* @test4_2(i8* %x4_1, i8* %x4_1, i8* %x4_1, i1 %c) store i32* null, i32** @g @@ -186,7 +184,7 @@ define void @test4_1(i8* %x4_1, i1 %c) { } ; FNATTR: define i8* @test4_2(i8* nocapture readnone %x4_2, i8* readnone returned %y4_2, i8* nocapture readnone %z4_2, i1 %c) -; ATTRIBUTOR: define i8* @test4_2(i8* nocapture %x4_2, i8* returned "no-capture-maybe-returned" %y4_2, i8* nocapture %z4_2, i1 %c) +; ATTRIBUTOR: define i8* @test4_2(i8* nocapture readnone %x4_2, i8* readnone returned "no-capture-maybe-returned" %y4_2, i8* nocapture readnone %z4_2, i1 %c) define i8* @test4_2(i8* %x4_2, i8* %y4_2, i8* %z4_2, i1 %c) { br i1 %c, label %t, label %f t: @@ -257,7 +255,8 @@ define void @captureLaunder(i8* %p) { ret void } -; EITHER: @nocaptureStrip(i8* nocapture %p) +; FNATTR: @nocaptureStrip(i8* nocapture %p) +; ATTRIBUTOR: @nocaptureStrip(i8* nocapture writeonly %p) define void @nocaptureStrip(i8* %p) { entry: %b = call i8* @llvm.strip.invariant.group.p0i8(i8* %p) @@ -273,22 +272,19 @@ define void @captureStrip(i8* %p) { ret void } -; FNATTR: define i1 @captureICmp(i32* readnone %x) -; ATTRIBUTOR: define i1 @captureICmp(i32* %x) +; EITHER: define i1 @captureICmp(i32* readnone %x) define i1 @captureICmp(i32* %x) { %1 = icmp eq i32* %x, null ret i1 %1 } -; FNATTR: define i1 @captureICmpRev(i32* readnone %x) -; ATTRIBUTOR: define i1 @captureICmpRev(i32* %x) +; EITHER: define i1 @captureICmpRev(i32* readnone %x) define i1 @captureICmpRev(i32* %x) { %1 = icmp eq i32* null, %x ret i1 %1 } -; FNATTR: define i1 @nocaptureInboundsGEPICmp(i32* nocapture readnone %x) -; ATTRIBUTOR: define i1 @nocaptureInboundsGEPICmp(i32* nocapture %x) +; EITHER: define i1 @nocaptureInboundsGEPICmp(i32* nocapture readnone %x) define i1 @nocaptureInboundsGEPICmp(i32* %x) { %1 = getelementptr inbounds i32, i32* %x, i32 5 %2 = bitcast i32* %1 to i8* @@ -296,8 +292,7 @@ define i1 @nocaptureInboundsGEPICmp(i32* %x) { ret i1 %3 } -; FNATTR: define i1 @nocaptureInboundsGEPICmpRev(i32* nocapture readnone %x) -; ATTRIBUTOR: define i1 @nocaptureInboundsGEPICmpRev(i32* nocapture %x) +; EITHER: define i1 @nocaptureInboundsGEPICmpRev(i32* nocapture readnone %x) define i1 @nocaptureInboundsGEPICmpRev(i32* %x) { %1 = getelementptr inbounds i32, i32* %x, i32 5 %2 = bitcast i32* %1 to i8* @@ -305,16 +300,14 @@ define i1 @nocaptureInboundsGEPICmpRev(i32* %x) { ret i1 %3 } -; FNATTR: define i1 @nocaptureDereferenceableOrNullICmp(i32* nocapture readnone dereferenceable_or_null(4) %x) -; ATTRIBUTOR: define i1 @nocaptureDereferenceableOrNullICmp(i32* nocapture dereferenceable_or_null(4) %x) +; EITHER: define i1 @nocaptureDereferenceableOrNullICmp(i32* nocapture readnone dereferenceable_or_null(4) %x) define i1 @nocaptureDereferenceableOrNullICmp(i32* dereferenceable_or_null(4) %x) { %1 = bitcast i32* %x to i8* %2 = icmp eq i8* %1, null ret i1 %2 } -; FNATTR: define i1 @captureDereferenceableOrNullICmp(i32* readnone dereferenceable_or_null(4) %x) -; ATTRIBUTOR: define i1 @captureDereferenceableOrNullICmp(i32* dereferenceable_or_null(4) %x) +; EITHER: define i1 @captureDereferenceableOrNullICmp(i32* readnone dereferenceable_or_null(4) %x) define i1 @captureDereferenceableOrNullICmp(i32* dereferenceable_or_null(4) %x) "null-pointer-is-valid"="true" { %1 = bitcast i32* %x to i8* %2 = icmp eq i8* %1, null diff --git a/llvm/test/Transforms/FunctionAttrs/nofree-attributor.ll b/llvm/test/Transforms/FunctionAttrs/nofree-attributor.ll index f76b429aa4b33..8015077f9031c 100644 --- a/llvm/test/Transforms/FunctionAttrs/nofree-attributor.ll +++ b/llvm/test/Transforms/FunctionAttrs/nofree-attributor.ll @@ -15,7 +15,7 @@ declare void @_ZdaPv(i8*) local_unnamed_addr #2 ; TEST 1 (positive case) ; FNATTR: Function Attrs: noinline norecurse nounwind readnone uwtable ; FNATTR-NEXT: define void @only_return() -; ATTRIBUTOR: Function Attrs: nofree noinline nosync nounwind uwtable +; ATTRIBUTOR: Function Attrs: nofree noinline nosync nounwind readnone uwtable ; ATTRIBUTOR-NEXT: define void @only_return() define void @only_return() #0 { ret void @@ -92,7 +92,7 @@ end: ; FNATTR: Function Attrs: noinline nounwind readnone uwtable ; FNATTR-NEXT: define void @mutual_recursion1() -; ATTRIBUTOR: Function Attrs: nofree noinline noreturn nosync nounwind uwtable +; ATTRIBUTOR: Function Attrs: nofree noinline noreturn nosync nounwind readnone uwtable ; ATTRIBUTOR-NEXT: define void @mutual_recursion1() define void @mutual_recursion1() #0 { call void @mutual_recursion2() @@ -101,7 +101,7 @@ define void @mutual_recursion1() #0 { ; FNATTR: Function Attrs: noinline nounwind readnone uwtable ; FNATTR-NEXT: define void @mutual_recursion2() -; ATTRIBUTOR: Function Attrs: nofree noinline noreturn nosync nounwind uwtable +; ATTRIBUTOR: Function Attrs: nofree noinline noreturn nosync nounwind readnone uwtable ; ATTRIBUTOR-NEXT: define void @mutual_recursion2() define void @mutual_recursion2() #0 { call void @mutual_recursion1() @@ -158,7 +158,7 @@ declare void @nofree_function() nofree readnone #0 ; FNATTR: Function Attrs: noinline nounwind readnone uwtable ; FNATTR-NEXT: define void @call_nofree_function() -; ATTRIBUTOR: Function Attrs: nofree noinline nosync nounwind uwtable +; ATTRIBUTOR: Function Attrs: nofree noinline nosync nounwind readnone uwtable ; ATTRIBUTOR-NEXT: define void @call_nofree_function() define void @call_nofree_function() #0 { tail call void @nofree_function() @@ -211,7 +211,7 @@ declare float @llvm.floor.f32(float) ; FNATTRS: Function Attrs: noinline nounwind uwtable ; FNATTRS-NEXT: define void @call_floor(float %a) ; FIXME: missing nofree -; ATTRIBUTOR: Function Attrs: noinline nosync nounwind uwtable +; ATTRIBUTOR: Function Attrs: noinline nosync nounwind readnone uwtable ; ATTRIBUTOR-NEXT: define void @call_floor(float %a) define void @call_floor(float %a) #0 { @@ -224,7 +224,7 @@ define void @call_floor(float %a) #0 { ; FNATTRS: Function Attrs: noinline nounwind uwtable ; FNATTRS-NEXT: define void @f1() -; ATTRIBUTOR: Function Attrs: nofree noinline nosync nounwind uwtable +; ATTRIBUTOR: Function Attrs: nofree noinline nosync nounwind readnone uwtable ; ATTRIBUTOR-NEXT: define void @f1() define void @f1() #0 { tail call void @nofree_function() @@ -233,7 +233,7 @@ define void @f1() #0 { ; FNATTRS: Function Attrs: noinline nounwind uwtable ; FNATTRS-NEXT: define void @f2() -; ATTRIBUTOR: Function Attrs: nofree noinline nosync nounwind uwtable +; ATTRIBUTOR: Function Attrs: nofree noinline nosync nounwind readnone uwtable ; ATTRIBUTOR-NEXT: define void @f2() define void @f2() #0 { tail call void @f1() diff --git a/llvm/test/Transforms/FunctionAttrs/nonnull.ll b/llvm/test/Transforms/FunctionAttrs/nonnull.ll index 0afba39e06777..73517b7c1d694 100644 --- a/llvm/test/Transforms/FunctionAttrs/nonnull.ll +++ b/llvm/test/Transforms/FunctionAttrs/nonnull.ll @@ -159,7 +159,7 @@ define void @test13_helper() { ret void } define internal void @test13(i8* %a, i8* %b, i8* %c) { -; ATTRIBUTOR: define internal void @test13(i8* nocapture nonnull %a, i8* nocapture %b, i8* nocapture %c) +; ATTRIBUTOR: define internal void @test13(i8* nocapture nonnull readnone %a, i8* nocapture readnone %b, i8* nocapture readnone %c) ret void } @@ -178,8 +178,8 @@ declare nonnull i8* @nonnull() define internal i32* @f1(i32* %arg) { -; FIXME: missing nonnull It should be nonnull @f1(i32* nonnull %arg) -; ATTRIBUTOR: define internal nonnull i32* @f1(i32* %arg) +; FIXME: missing nonnull It should be nonnull @f1(i32* nonnull readonly %arg) +; ATTRIBUTOR: define internal nonnull i32* @f1(i32* readonly %arg) bb: %tmp = icmp eq i32* %arg, null @@ -212,18 +212,18 @@ define internal i32* @f2(i32* %arg) { ; ATTRIBUTOR: define internal nonnull i32* @f2(i32* %arg) bb: -; FIXME: missing nonnull. It should be @f1(i32* nonnull %arg) -; ATTRIBUTOR: %tmp = tail call nonnull i32* @f1(i32* %arg) +; FIXME: missing nonnull. It should be @f1(i32* nonnull readonly %arg) +; ATTRIBUTOR: %tmp = tail call nonnull i32* @f1(i32* readonly %arg) %tmp = tail call i32* @f1(i32* %arg) ret i32* %tmp } define dso_local noalias i32* @f3(i32* %arg) { -; FIXME: missing nonnull. It should be nonnull @f3(i32* nonnull %arg) -; ATTRIBUTOR: define dso_local noalias i32* @f3(i32* %arg) +; FIXME: missing nonnull. It should be nonnull @f3(i32* nonnull readonly %arg) +; ATTRIBUTOR: define dso_local noalias i32* @f3(i32* readonly %arg) bb: -; FIXME: missing nonnull. It should be @f1(i32* nonnull %arg) -; ATTRIBUTOR: %tmp = call i32* @f1(i32* %arg) +; FIXME: missing nonnull. It should be @f1(i32* nonnull readonly %arg) +; ATTRIBUTOR: %tmp = call i32* @f1(i32* readonly %arg) %tmp = call i32* @f1(i32* %arg) ret i32* null } @@ -402,7 +402,7 @@ declare i32 @esfp(...) define i1 @parent8(i8* %a, i8* %bogus1, i8* %b) personality i8* bitcast (i32 (...)* @esfp to i8*){ ; FNATTR-LABEL: @parent8(i8* nonnull %a, i8* nocapture readnone %bogus1, i8* nonnull %b) ; FIXME : missing "nonnull", it should be @parent8(i8* nonnull %a, i8* %bogus1, i8* nonnull %b) -; ATTRIBUTOR-LABEL: @parent8(i8* %a, i8* nocapture %bogus1, i8* %b) +; ATTRIBUTOR-LABEL: @parent8(i8* %a, i8* nocapture readnone %bogus1, i8* %b) ; BOTH-NEXT: entry: ; FNATTR-NEXT: invoke void @use2nonnull(i8* %a, i8* %b) ; ATTRIBUTOR-NEXT: invoke void @use2nonnull(i8* nonnull %a, i8* nonnull %b) @@ -458,7 +458,7 @@ define i32* @g1() { ret i32* %c } -; ATTRIBUTOR: define internal void @called_by_weak(i32* nocapture nonnull %a) +; ATTRIBUTOR: define internal void @called_by_weak(i32* nocapture nonnull readnone %a) define internal void @called_by_weak(i32* %a) { ret void } diff --git a/llvm/test/Transforms/FunctionAttrs/norecurse.ll b/llvm/test/Transforms/FunctionAttrs/norecurse.ll index ed086341c4719..572cd22119a23 100644 --- a/llvm/test/Transforms/FunctionAttrs/norecurse.ll +++ b/llvm/test/Transforms/FunctionAttrs/norecurse.ll @@ -4,14 +4,14 @@ ; CHECK: Function Attrs ; CHECK-SAME: norecurse nounwind readnone -; ATTRIBUTOR: Function Attrs: nofree norecurse nosync nounwind willreturn +; ATTRIBUTOR: Function Attrs: nofree norecurse nosync nounwind readnone willreturn ; BOTH-NEXT: define i32 @leaf() define i32 @leaf() { ret i32 1 } ; BOTH: Function Attrs -; CHECK-SAME: readnone +; BOTH-SAME: readnone ; BOTH-NOT: norecurse ; BOTH-NEXT: define i32 @self_rec() define i32 @self_rec() { @@ -20,7 +20,7 @@ define i32 @self_rec() { } ; BOTH: Function Attrs -; CHECK-SAME: readnone +; BOTH-SAME: readnone ; BOTH-NOT: norecurse ; BOTH-NEXT: define i32 @indirect_rec() define i32 @indirect_rec() { @@ -28,7 +28,7 @@ define i32 @indirect_rec() { ret i32 %a } ; BOTH: Function Attrs -; CHECK-SAME: readnone +; BOTH-SAME: readnone ; BOTH-NOT: norecurse ; BOTH-NEXT: define i32 @indirect_rec2() define i32 @indirect_rec2() { @@ -37,7 +37,7 @@ define i32 @indirect_rec2() { } ; BOTH: Function Attrs -; CHECK-SAME: readnone +; BOTH-SAME: readnone ; BOTH-NOT: norecurse ; BOTH-NEXT: define i32 @extern() define i32 @extern() { @@ -53,7 +53,7 @@ declare i32 @k() readnone ; CHECK-SAME: nounwind ; BOTH-NOT: norecurse ; CHECK-NEXT: define void @intrinsic(i8* nocapture %dest, i8* nocapture readonly %src, i32 %len) -; ATTRIBUTOR-NEXT: define void @intrinsic(i8* nocapture %dest, i8* nocapture %src, i32 %len) +; ATTRIBUTOR-NEXT: define void @intrinsic(i8* nocapture writeonly %dest, i8* nocapture readonly %src, i32 %len) define void @intrinsic(i8* %dest, i8* %src, i32 %len) { call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 %len, i1 false) ret void @@ -66,7 +66,7 @@ declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i1) ; BOTH: Function Attrs ; CHECK-SAME: norecurse readnone ; FIXME: missing "norecurse" -; ATTRIBUTOR-SAME: nosync +; ATTRIBUTOR-SAME: nosync readnone ; CHECK-NEXT: define internal i32 @called_by_norecurse() define internal i32 @called_by_norecurse() { %a = call i32 @k() @@ -138,7 +138,7 @@ define i32 @eval_func(i32 (i32)* , i32) local_unnamed_addr { declare void @unknown() ; Call an unknown function in a dead block. -; ATTRIBUTOR: Function Attrs: nofree norecurse nosync nounwind willreturn +; ATTRIBUTOR: Function Attrs: nofree norecurse nosync nounwind readnone willreturn ; ATTRIBUTOR: define i32 @call_unknown_in_dead_block() define i32 @call_unknown_in_dead_block() local_unnamed_addr { ret i32 0 diff --git a/llvm/test/Transforms/FunctionAttrs/nosync.ll b/llvm/test/Transforms/FunctionAttrs/nosync.ll index d948048db129f..353835a90062d 100644 --- a/llvm/test/Transforms/FunctionAttrs/nosync.ll +++ b/llvm/test/Transforms/FunctionAttrs/nosync.ll @@ -28,7 +28,7 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" ; FNATTR: Function Attrs: norecurse nounwind optsize readnone ssp uwtable ; FNATTR-NEXT: define nonnull i32* @foo(%struct.ST* readnone %s) ; ATTRIBUTOR: Function Attrs: nofree nosync nounwind optsize readnone ssp uwtable -; ATTRIBUTOR-NEXT: define nonnull i32* @foo(%struct.ST* "no-capture-maybe-returned" %s) +; ATTRIBUTOR-NEXT: define nonnull i32* @foo(%struct.ST* readnone "no-capture-maybe-returned" %s) define i32* @foo(%struct.ST* %s) nounwind uwtable readnone optsize ssp { entry: %arrayidx = getelementptr inbounds %struct.ST, %struct.ST* %s, i64 1, i32 2, i32 1, i64 5, i64 13 @@ -61,7 +61,7 @@ define i32 @load_monotonic(i32* nocapture readonly %0) norecurse nounwind uwtabl ; FNATTR: Function Attrs: nofree norecurse nounwind uwtable ; FNATTR-NEXT: define void @store_monotonic(i32* nocapture %0) ; ATTRIBUTOR: Function Attrs: nofree norecurse nosync nounwind uwtable -; ATTRIBUTOR-NEXT: define void @store_monotonic(i32* nocapture %0) +; ATTRIBUTOR-NEXT: define void @store_monotonic(i32* nocapture writeonly %0) define void @store_monotonic(i32* nocapture %0) norecurse nounwind uwtable { store atomic i32 10, i32* %0 monotonic, align 4 ret void @@ -94,7 +94,7 @@ define i32 @load_acquire(i32* nocapture readonly %0) norecurse nounwind uwtable ; FNATTR-NEXT: define void @load_release(i32* nocapture %0) ; ATTRIBUTOR: Function Attrs: nofree norecurse nounwind uwtable ; ATTRIBUTOR-NOT: nosync -; ATTRIBUTOR-NEXT: define void @load_release(i32* nocapture %0) +; ATTRIBUTOR-NEXT: define void @load_release(i32* nocapture writeonly %0) define void @load_release(i32* nocapture %0) norecurse nounwind uwtable { store atomic volatile i32 10, i32* %0 release, align 4 ret void @@ -106,7 +106,7 @@ define void @load_release(i32* nocapture %0) norecurse nounwind uwtable { ; FNATTR-NEXT: define void @load_volatile_release(i32* nocapture %0) ; ATTRIBUTOR: Function Attrs: nofree norecurse nounwind uwtable ; ATTRIBUTOR-NOT: nosync -; ATTRIBUTOR-NEXT: define void @load_volatile_release(i32* nocapture %0) +; ATTRIBUTOR-NEXT: define void @load_volatile_release(i32* nocapture writeonly %0) define void @load_volatile_release(i32* nocapture %0) norecurse nounwind uwtable { store atomic volatile i32 10, i32* %0 release, align 4 ret void @@ -185,8 +185,8 @@ define void @call_might_sync() nounwind uwtable noinline { ; FNATTR: Function Attrs: nofree noinline nounwind uwtable ; FNATTR-NEXT: define i32 @scc1(i32* %0) -; ATTRIBUTOR: Function Attrs: nofree noinline noreturn nosync nounwind uwtable -; ATTRIBUTOR-NEXT: define i32 @scc1(i32* nocapture %0) +; ATTRIBUTOR: Function Attrs: nofree noinline noreturn nosync nounwind readnone uwtable +; ATTRIBUTOR-NEXT: define i32 @scc1(i32* nocapture readnone %0) define i32 @scc1(i32* %0) noinline nounwind uwtable { tail call void @scc2(i32* %0); %val = tail call i32 @volatile_load(i32* %0); @@ -195,8 +195,8 @@ define i32 @scc1(i32* %0) noinline nounwind uwtable { ; FNATTR: Function Attrs: nofree noinline nounwind uwtable ; FNATTR-NEXT: define void @scc2(i32* %0) -; ATTRIBUTOR: Function Attrs: nofree noinline noreturn nosync nounwind uwtable -; ATTRIBUTOR-NEXT: define void @scc2(i32* nocapture %0) +; ATTRIBUTOR: Function Attrs: nofree noinline noreturn nosync nounwind readnone uwtable +; ATTRIBUTOR-NEXT: define void @scc2(i32* nocapture readnone %0) define void @scc2(i32* %0) noinline nounwind uwtable { tail call i32 @scc1(i32* %0); ret void; @@ -224,7 +224,7 @@ define void @scc2(i32* %0) noinline nounwind uwtable { ; FNATTR: Function Attrs: nofree norecurse nounwind ; FNATTR-NEXT: define void @foo1(i32* nocapture %0, %"struct.std::atomic"* nocapture %1) ; ATTRIBUTOR-NOT: nosync -; ATTRIBUTOR: define void @foo1(i32* nocapture %0, %"struct.std::atomic"* nocapture %1) +; ATTRIBUTOR: define void @foo1(i32* nocapture writeonly %0, %"struct.std::atomic"* nocapture writeonly %1) define void @foo1(i32* %0, %"struct.std::atomic"* %1) { store i32 100, i32* %0, align 4 fence release @@ -236,7 +236,7 @@ define void @foo1(i32* %0, %"struct.std::atomic"* %1) { ; FNATTR: Function Attrs: nofree norecurse nounwind ; FNATTR-NEXT: define void @bar(i32* nocapture readnone %0, %"struct.std::atomic"* nocapture readonly %1) ; ATTRIBUTOR-NOT: nosync -; ATTRIBUTOR: define void @bar(i32* nocapture %0, %"struct.std::atomic"* nocapture %1) +; ATTRIBUTOR: define void @bar(i32* nocapture readnone %0, %"struct.std::atomic"* nocapture readonly %1) define void @bar(i32* %0, %"struct.std::atomic"* %1) { %3 = getelementptr inbounds %"struct.std::atomic", %"struct.std::atomic"* %1, i64 0, i32 0, i32 0 br label %4 @@ -256,7 +256,7 @@ define void @bar(i32* %0, %"struct.std::atomic"* %1) { ; FNATTR: Function Attrs: nofree norecurse nounwind ; FNATTR-NEXT: define void @foo1_singlethread(i32* nocapture %0, %"struct.std::atomic"* nocapture %1) ; ATTRIBUTOR: Function Attrs: nofree nosync -; ATTRIBUTOR: define void @foo1_singlethread(i32* nocapture %0, %"struct.std::atomic"* nocapture %1) +; ATTRIBUTOR: define void @foo1_singlethread(i32* nocapture writeonly %0, %"struct.std::atomic"* nocapture writeonly %1) define void @foo1_singlethread(i32* %0, %"struct.std::atomic"* %1) { store i32 100, i32* %0, align 4 fence syncscope("singlethread") release @@ -268,7 +268,7 @@ define void @foo1_singlethread(i32* %0, %"struct.std::atomic"* %1) { ; FNATTR: Function Attrs: nofree norecurse nounwind ; FNATTR-NEXT: define void @bar_singlethread(i32* nocapture readnone %0, %"struct.std::atomic"* nocapture readonly %1) ; ATTRIBUTOR: Function Attrs: nofree nosync -; ATTRIBUTOR: define void @bar_singlethread(i32* nocapture %0, %"struct.std::atomic"* nocapture %1) +; ATTRIBUTOR: define void @bar_singlethread(i32* nocapture readnone %0, %"struct.std::atomic"* nocapture readonly %1) define void @bar_singlethread(i32* %0, %"struct.std::atomic"* %1) { %3 = getelementptr inbounds %"struct.std::atomic", %"struct.std::atomic"* %1, i64 0, i32 0, i32 0 br label %4 @@ -293,7 +293,7 @@ declare void @llvm.memset(i8* %dest, i8 %val, i32 %len, i1 %isvolatile) ; ; ATTRIBUTOR: Function Attrs: nounwind ; ATTRIBUTOR-NOT: nosync -; ATTRIBUTOR-NEXT: define i32 @memcpy_volatile(i8* nocapture %ptr1, i8* nocapture %ptr2) +; ATTRIBUTOR-NEXT: define i32 @memcpy_volatile(i8* nocapture writeonly %ptr1, i8* nocapture readonly %ptr2) define i32 @memcpy_volatile(i8* %ptr1, i8* %ptr2) { call void @llvm.memcpy(i8* %ptr1, i8* %ptr2, i32 8, i1 1) ret i32 4 @@ -304,7 +304,7 @@ define i32 @memcpy_volatile(i8* %ptr1, i8* %ptr2) { ; It is odd to add nocapture but a result of the llvm.memset nocapture. ; ; ATTRIBUTOR: Function Attrs: nosync -; ATTRIBUTOR-NEXT: define i32 @memset_non_volatile(i8* nocapture %ptr1, i8 %val) +; ATTRIBUTOR-NEXT: define i32 @memset_non_volatile(i8* nocapture writeonly %ptr1, i8 %val) define i32 @memset_non_volatile(i8* %ptr1, i8 %val) { call void @llvm.memset(i8* %ptr1, i8 %val, i32 8, i1 0) ret i32 4 diff --git a/llvm/test/Transforms/FunctionAttrs/read_write_returned_arguments_scc.ll b/llvm/test/Transforms/FunctionAttrs/read_write_returned_arguments_scc.ll index 4b2834ef56064..4a68627c574fe 100644 --- a/llvm/test/Transforms/FunctionAttrs/read_write_returned_arguments_scc.ll +++ b/llvm/test/Transforms/FunctionAttrs/read_write_returned_arguments_scc.ll @@ -102,7 +102,7 @@ return: ; preds = %if.end, %if.then } ; CHECK: Function Attrs: nofree norecurse nosync nounwind -; CHECK-NEXT: define i32* @external_sink_ret2_nrw(i32* readnone %n0, i32* nocapture readonly %r0, i32* returned "no-capture-maybe-returned" %w0) +; CHECK-NEXT: define i32* @external_sink_ret2_nrw(i32* readnone %n0, i32* nocapture readonly %r0, i32* returned writeonly "no-capture-maybe-returned" %w0) define i32* @external_sink_ret2_nrw(i32* %n0, i32* %r0, i32* %w0) { entry: %tobool = icmp ne i32* %n0, null diff --git a/llvm/test/Transforms/FunctionAttrs/readattrs.ll b/llvm/test/Transforms/FunctionAttrs/readattrs.ll index 0521986bd1bb8..a97f49901b215 100644 --- a/llvm/test/Transforms/FunctionAttrs/readattrs.ll +++ b/llvm/test/Transforms/FunctionAttrs/readattrs.ll @@ -1,19 +1,24 @@ -; RUN: opt < %s -functionattrs -S | FileCheck %s -; RUN: opt < %s -aa-pipeline=basic-aa -passes='cgscc(function-attrs)' -S | FileCheck %s +; RUN: opt < %s -functionattrs -S | FileCheck %s --check-prefixes=CHECK,FNATTR +; RUN: opt < %s -aa-pipeline=basic-aa -passes='cgscc(function-attrs)' -S | FileCheck %s --check-prefixes=CHECK,FNATTR +; RUN: opt < %s -attributor -attributor-disable=false -S | FileCheck %s --check-prefixes=CHECK,ATTRIBUTOR +; RUN: opt < %s -aa-pipeline=basic-aa -passes='attributor' -attributor-disable=false -S | FileCheck %s --check-prefixes=CHECK,ATTRIBUTOR + @x = global i32 0 declare void @test1_1(i8* %x1_1, i8* readonly %y1_1, ...) ; NOTE: readonly for %y1_2 would be OK here but not for the similar situation in test13. ; -; CHECK: define void @test1_2(i8* %x1_2, i8* readonly %y1_2, i8* %z1_2) +; FNATTR: define void @test1_2(i8* %x1_2, i8* readonly %y1_2, i8* %z1_2) +; ATTRIBUTOR: define void @test1_2(i8* %x1_2, i8* %y1_2, i8* %z1_2) define void @test1_2(i8* %x1_2, i8* %y1_2, i8* %z1_2) { call void (i8*, i8*, ...) @test1_1(i8* %x1_2, i8* %y1_2, i8* %z1_2) store i32 0, i32* @x ret void } -; CHECK: define i8* @test2(i8* readnone returned %p) +; FNATTR: define i8* @test2(i8* readnone returned %p) +; ATTRIBUTOR: define i8* @test2(i8* readnone returned %p) define i8* @test2(i8* %p) { store i32 0, i32* @x ret i8* %p @@ -33,7 +38,8 @@ define void @test4_2(i8* %p) { ret void } -; CHECK: define void @test5(i8** nocapture %p, i8* %q) +; FNATTR: define void @test5(i8** nocapture %p, i8* %q) +; ATTRIBUTOR: define void @test5(i8** nocapture writeonly %p, i8* %q) ; Missed optz'n: we could make %q readnone, but don't break test6! define void @test5(i8** %p, i8* %q) { store i8* %q, i8** %p @@ -41,7 +47,8 @@ define void @test5(i8** %p, i8* %q) { } declare void @test6_1() -; CHECK: define void @test6_2(i8** nocapture %p, i8* %q) +; FNATTR: define void @test6_2(i8** nocapture %p, i8* %q) +; ATTRIBUTOR: define void @test6_2(i8** nocapture writeonly %p, i8* %q) ; This is not a missed optz'n. define void @test6_2(i8** %p, i8* %q) { store i8* %q, i8** %p @@ -49,19 +56,22 @@ define void @test6_2(i8** %p, i8* %q) { ret void } -; CHECK: define void @test7_1(i32* inalloca nocapture %a) +; FNATTR: define void @test7_1(i32* inalloca nocapture %a) +; ATTRIBUTOR: define void @test7_1(i32* inalloca nocapture writeonly %a) ; inalloca parameters are always considered written define void @test7_1(i32* inalloca %a) { ret void } -; CHECK: define i32* @test8_1(i32* readnone returned %p) +; FNATTR: define i32* @test8_1(i32* readnone returned %p) +; ATTRIBUTOR: define i32* @test8_1(i32* readnone returned %p) define i32* @test8_1(i32* %p) { entry: ret i32* %p } -; CHECK: define void @test8_2(i32* %p) +; FNATTR: define void @test8_2(i32* %p) +; ATTRIBUTOR: define void @test8_2(i32* nocapture writeonly %p) define void @test8_2(i32* %p) { entry: %call = call i32* @test8_1(i32* %p) @@ -115,18 +125,21 @@ define i32 @volatile_load(i32* %p) { ret i32 %load } -declare void @escape_readonly_ptr(i8** %addr, i8* readnone %ptr) -declare void @escape_readnone_ptr(i8** %addr, i8* readonly %ptr) +declare void @escape_readnone_ptr(i8** %addr, i8* readnone %ptr) +declare void @escape_readonly_ptr(i8** %addr, i8* readonly %ptr) ; The argument pointer %escaped_then_written cannot be marked readnone/only even ; though the only direct use, in @escape_readnone_ptr/@escape_readonly_ptr, ; is marked as readnone/only. However, the functions can write the pointer into ; %addr, causing the store to write to %escaped_then_written. ; -; FIXME: This test currently exposes a bug! +; FIXME: This test currently exposes a bug in functionattrs! +; +; FNATTR: define void @unsound_readnone(i8* nocapture readnone %ignored, i8* readnone %escaped_then_written) +; FNATTR: define void @unsound_readonly(i8* nocapture readnone %ignored, i8* readonly %escaped_then_written) ; -; BUG: define void @unsound_readnone(i8* %ignored, i8* readnone %escaped_then_written) -; BUG: define void @unsound_readonly(i8* %ignored, i8* readonly %escaped_then_written) +; ATTRIBUTOR: define void @unsound_readnone(i8* nocapture readnone %ignored, i8* %escaped_then_written) +; ATTRIBUTOR: define void @unsound_readonly(i8* nocapture readnone %ignored, i8* %escaped_then_written) define void @unsound_readnone(i8* %ignored, i8* %escaped_then_written) { %addr = alloca i8* call void @escape_readnone_ptr(i8** %addr, i8* %escaped_then_written) diff --git a/llvm/test/Transforms/FunctionAttrs/willreturn.ll b/llvm/test/Transforms/FunctionAttrs/willreturn.ll index a1f28e03f191d..2528382d9bd4f 100644 --- a/llvm/test/Transforms/FunctionAttrs/willreturn.ll +++ b/llvm/test/Transforms/FunctionAttrs/willreturn.ll @@ -11,7 +11,7 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" ; TEST 1 (positive case) ; FNATTR: Function Attrs: noinline norecurse nounwind readnone uwtable ; FNATTR-NEXT: define void @only_return() -; ATTRIBUTOR: Function Attrs: nofree noinline norecurse nosync nounwind uwtable willreturn +; ATTRIBUTOR: Function Attrs: nofree noinline norecurse nosync nounwind readnone uwtable willreturn ; ATTRIBUTOR-NEXT: define void @only_return() define void @only_return() #0 { ret void @@ -28,7 +28,7 @@ define void @only_return() #0 { ; FNATTR: Function Attrs: noinline nounwind readnone uwtable ; FNATTR-NEXT: define i32 @fib(i32 %0) ; FIXME: missing willreturn -; ATTRIBUTOR: Function Attrs: nofree noinline nosync nounwind uwtable +; ATTRIBUTOR: Function Attrs: nofree noinline nosync nounwind readnone uwtable ; ATTRIBUTOR-NEXT: define i32 @fib(i32 %0) local_unnamed_addr define i32 @fib(i32 %0) local_unnamed_addr #0 { %2 = icmp slt i32 %0, 2 @@ -59,7 +59,7 @@ define i32 @fib(i32 %0) local_unnamed_addr #0 { ; FNATTR: Function Attrs: noinline norecurse nounwind readnone uwtable ; FNATTR-NOT: willreturn ; FNATTR-NEXT: define i32 @fact_maybe_not_halt(i32 %0) local_unnamed_addr -; ATTRIBUTOR: Function Attrs: nofree noinline norecurse nosync nounwind uwtable +; ATTRIBUTOR: Function Attrs: nofree noinline norecurse nosync nounwind readnone uwtable ; ATTRIBUTOR-NOT: willreturn ; ATTRIBUTOR-NEXT: define i32 @fact_maybe_not_halt(i32 %0) local_unnamed_addr define i32 @fact_maybe_not_halt(i32 %0) local_unnamed_addr #0 { @@ -95,7 +95,7 @@ define i32 @fact_maybe_not_halt(i32 %0) local_unnamed_addr #0 { ; FIXME: missing willreturn ; FNATTR: Function Attrs: noinline norecurse nounwind readnone uwtable ; FNATTR-NEXT: define i32 @fact_loop(i32 %0) -; ATTRIBUTOR: Function Attrs: nofree noinline norecurse nosync nounwind uwtable +; ATTRIBUTOR: Function Attrs: nofree noinline norecurse nosync nounwind readnone uwtable ; ATTRIBUTOR-NEXT: define i32 @fact_loop(i32 %0) local_unnamed_addr define i32 @fact_loop(i32 %0) local_unnamed_addr #0 { %2 = icmp slt i32 %0, 1 @@ -126,7 +126,7 @@ define i32 @fact_loop(i32 %0) local_unnamed_addr #0 { ; FNATTR: Function Attrs: noinline nounwind readnone uwtable ; FNATTR-NOT: willreturn ; FNATTR-NEXT: define void @mutual_recursion1(i1 %c) -; ATTRIBUTOR: Function Attrs: nofree noinline nosync nounwind uwtable +; ATTRIBUTOR: Function Attrs: nofree noinline nosync nounwind readnone uwtable ; ATTRIBUTOR-NOT: willreturn ; ATTRIBUTOR-NEXT: define void @mutual_recursion1(i1 %c) define void @mutual_recursion1(i1 %c) #0 { @@ -142,7 +142,7 @@ end: ; FNATTR: Function Attrs: noinline nounwind readnone uwtable ; FNATTR-NOT: willreturn ; FNATTR-NEXT: define void @mutual_recursion2(i1 %c) -; ATTRIBUTOR: Function Attrs: nofree noinline nosync nounwind uwtable +; ATTRIBUTOR: Function Attrs: nofree noinline nosync nounwind readnone uwtable ; ATTRIBUTOR-NOT: willreturn ; ATTRIBUTOR-NEXT: define void @mutual_recursion2(i1 %c) define void @mutual_recursion2(i1 %c) #0 { @@ -216,10 +216,10 @@ define void @conditional_exit(i32 %0, i32* nocapture readonly %1) local_unnamed_ ; ATTRIBUTOR-NEXT: declare float @llvm.floor.f32(float) declare float @llvm.floor.f32(float) -; FNATTRS: Function Attrs: noinline nounwind uwtable +; FNATTRS: Function Attrs: noinline nounwind readnone uwtable ; FNATTRS-NEXT: define void @call_floor(float %a) ; FIXME: missing willreturn -; ATTRIBUTOR: Function Attrs: noinline nosync nounwind uwtable +; ATTRIBUTOR: Function Attrs: noinline nosync nounwind readnone uwtable ; ATTRIBUTOR-NEXT: define void @call_floor(float %a) define void @call_floor(float %a) #0 { tail call float @llvm.floor.f32(float %a) @@ -337,7 +337,7 @@ declare i32 @__gxx_personality_v0(...) ; FIXME: missing willreturn ; FNATTR: Function Attrs: noinline norecurse nounwind readonly uwtable ; FNATTR-NEXT: define i32 @loop_constant_trip_count(i32* nocapture readonly %0) -; ATTRIBUTOR: Function Attrs: nofree noinline norecurse nosync nounwind uwtable +; ATTRIBUTOR: Function Attrs: nofree noinline norecurse nosync nounwind readonly uwtable ; ATTRIBUTOR-NEXT: define i32 @loop_constant_trip_count(i32* nocapture readonly %0) define i32 @loop_constant_trip_count(i32* nocapture readonly %0) #0 { br label %3 @@ -370,7 +370,7 @@ define i32 @loop_constant_trip_count(i32* nocapture readonly %0) #0 { ; FNATTR: Function Attrs: noinline norecurse nounwind readonly uwtable ; FNATTR-NOT: willreturn ; FNATTR-NEXT: define i32 @loop_trip_count_unbound(i32 %0, i32 %1, i32* nocapture readonly %2, i32 %3) local_unnamed_addr -; ATTRIBUTOR: Function Attrs: nofree noinline norecurse nosync nounwind uwtable +; ATTRIBUTOR: Function Attrs: nofree noinline norecurse nosync nounwind readonly uwtable ; ATTRIBUTOR-NOT: willreturn ; ATTRIBUTOR-NEXT: define i32 @loop_trip_count_unbound(i32 %0, i32 %1, i32* nocapture readonly %2, i32 %3) local_unnamed_addr define i32 @loop_trip_count_unbound(i32 %0, i32 %1, i32* nocapture readonly %2, i32 %3) local_unnamed_addr #0 { @@ -408,7 +408,7 @@ define i32 @loop_trip_count_unbound(i32 %0, i32 %1, i32* nocapture readonly %2, ; FIXME: missing willreturn ; FNATTR: Function Attrs: noinline norecurse nounwind readonly uwtable ; FNATTR-NEXT: define i32 @loop_trip_dec(i32 %0, i32* nocapture readonly %1) -; ATTRIBUTOR: Function Attrs: nofree noinline norecurse nosync nounwind uwtable +; ATTRIBUTOR: Function Attrs: nofree noinline norecurse nosync nounwind readonly uwtable ; ATTRIBUTOR-NEXT: define i32 @loop_trip_dec(i32 %0, i32* nocapture readonly %1) local_unnamed_addr define i32 @loop_trip_dec(i32 %0, i32* nocapture readonly %1) local_unnamed_addr #0 { @@ -439,7 +439,7 @@ define i32 @loop_trip_dec(i32 %0, i32* nocapture readonly %1) local_unnamed_addr ; FNATTR: Function Attrs: noinline norecurse nounwind readnone uwtable ; FNATTR-NEXT: define i32 @multiple_return(i32 %a) -; ATTRIBUTOR: Function Attrs: nofree noinline norecurse nosync nounwind uwtable willreturn +; ATTRIBUTOR: Function Attrs: nofree noinline norecurse nosync nounwind readnone uwtable willreturn ; ATTRIBUTOR-NEXT: define i32 @multiple_return(i32 %a) define i32 @multiple_return(i32 %a) #0 { %b = icmp eq i32 %a, 0 @@ -471,7 +471,7 @@ unreachable_label: ; FIXME: missing willreturn ; FNATTR: Function Attrs: noinline nounwind uwtable ; FNATTR-NEXT: define i32 @unreachable_exit_positive2(i32 %0) -; ATTRIBUTOR: Function Attrs: nofree noinline norecurse nosync nounwind uwtable +; ATTRIBUTOR: Function Attrs: nofree noinline norecurse nosync nounwind readnone uwtable ; ATTRIBUTOR-NEXT: define i32 @unreachable_exit_positive2(i32 %0) define i32 @unreachable_exit_positive2(i32) local_unnamed_addr #0 { %2 = icmp slt i32 %0, 1 @@ -515,7 +515,7 @@ unreachable_label: ; FNATTR: Function Attrs: noinline nounwind uwtable ; FNATTR-NOT: willreturn ; FNATTR-NEXT: define void @unreachable_exit_negative2() -; ATTRIBUTOR: Function Attrs: nofree noinline norecurse noreturn nosync nounwind uwtable +; ATTRIBUTOR: Function Attrs: nofree noinline norecurse noreturn nosync nounwind readnone uwtable ; ATTRIBUTOR-NOT: willreturn ; ATTRIBUTOR-NEXT: define void @unreachable_exit_negative2() define void @unreachable_exit_negative2() #0 { From f5d700ac05cb3b3fdb22619186ce9f0376dcca10 Mon Sep 17 00:00:00 2001 From: Alexander Shaposhnikov Date: Mon, 7 Oct 2019 21:14:22 +0000 Subject: [PATCH 166/254] [llvm-lipo] Relax the check of the specified input file architecture cctools lipo only compares the cputypes when it verifies that the specified (via -arch) input file and the architecture match. This diff adjusts the behavior of llvm-lipo accordingly. Differential revision: https://reviews.llvm.org/D68319 Test plan: make check-all llvm-svn: 373966 --- llvm/tools/llvm-lipo/llvm-lipo.cpp | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/llvm/tools/llvm-lipo/llvm-lipo.cpp b/llvm/tools/llvm-lipo/llvm-lipo.cpp index 5eb3332c02e05..e746db414058d 100644 --- a/llvm/tools/llvm-lipo/llvm-lipo.cpp +++ b/llvm/tools/llvm-lipo/llvm-lipo.cpp @@ -23,6 +23,7 @@ #include "llvm/Support/FileOutputBuffer.h" #include "llvm/Support/InitLLVM.h" #include "llvm/Support/WithColor.h" +#include "llvm/TextAPI/MachO/Architecture.h" using namespace llvm; using namespace llvm::object; @@ -438,14 +439,19 @@ readInputBinaries(ArrayRef InputFiles) { if (!B->isArchive() && !B->isMachO() && !B->isMachOUniversalBinary()) reportError("File " + IF.FileName + " has unsupported binary format"); if (IF.ArchType && (B->isMachO() || B->isArchive())) { - const auto ArchType = - B->isMachO() ? Slice(cast(B)).getArchString() - : Slice(cast(B)).getArchString(); - if (Triple(*IF.ArchType).getArch() != Triple(ArchType).getArch()) + const auto S = B->isMachO() ? Slice(cast(B)) + : Slice(cast(B)); + const auto SpecifiedCPUType = + MachO::getCPUTypeFromArchitecture( + MachO::mapToArchitecture(Triple(*IF.ArchType))) + .first; + // For compatibility with cctools' lipo the comparison is relaxed just to + // checking cputypes. + if (S.getCPUType() != SpecifiedCPUType) reportError("specified architecture: " + *IF.ArchType + " for file: " + B->getFileName() + - " does not match the file's architecture (" + ArchType + - ")"); + " does not match the file's architecture (" + + S.getArchString() + ")"); } InputBinaries.push_back(std::move(*BinaryOrErr)); } From 58af5be28dafedafba3fb20ba35ac0ae4f2c570a Mon Sep 17 00:00:00 2001 From: Heejin Ahn Date: Mon, 7 Oct 2019 21:14:45 +0000 Subject: [PATCH 167/254] [WebAssembly] Add memory intrinsics handling to mayThrow() Summary: Previously, `WebAssembly::mayThrow()` assumed all inputs are global addresses. But when intrinsics, such as `memcpy`, `memmove`, or `memset` are lowered to external symbols in instruction selection and later emitted as library calls. And these functions don't throw. This patch adds handling to those memory intrinsics to `mayThrow` function. But while most of libcalls don't throw, we can't guarantee all of them don't throw, so currently we conservatively return true for all other external symbols. I think a better way to solve this problem is to embed 'nounwind' info in `TargetLowering::CallLoweringInfo`, so that we can access the info from the backend. This will also enable transferring 'nounwind' properties of LLVM IR instructions. Currently we don't transfer that info and we can only access properties of callee functions, if the callees are within the module. Other targets don't need this info in the backend because they do all the processing before isel, but it will help us because that info will reduce code size increase in fixing unwind destination mismatches in CFGStackify. But for now we return false for these memory intrinsics and true for all other libcalls conservatively. Reviewers: dschuff Subscribers: sbc100, jgravelle-google, hiraditya, sunfish, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D68553 llvm-svn: 373967 --- .../WebAssembly/WebAssemblyUtilities.cpp | 16 ++++++- .../CodeGen/WebAssembly/cfg-stackify-eh.ll | 46 +++++++++++++++++++ 2 files changed, 61 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp index 81c16f0ed21f3..a237da8154ab7 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp @@ -50,7 +50,21 @@ bool WebAssembly::mayThrow(const MachineInstr &MI) { return false; const MachineOperand &MO = MI.getOperand(getCalleeOpNo(MI.getOpcode())); - assert(MO.isGlobal()); + assert(MO.isGlobal() || MO.isSymbol()); + + if (MO.isSymbol()) { + // Some intrinsics are lowered to calls to external symbols, which are then + // lowered to calls to library functions. Most of libcalls don't throw, but + // we only list some of them here now. + // TODO Consider adding 'nounwind' info in TargetLowering::CallLoweringInfo + // instead for more accurate info. + const char *Name = MO.getSymbolName(); + if (strcmp(Name, "memcpy") == 0 || strcmp(Name, "memmove") == 0 || + strcmp(Name, "memset") == 0) + return false; + return true; + } + const auto *F = dyn_cast(MO.getGlobal()); if (!F) return true; diff --git a/llvm/test/CodeGen/WebAssembly/cfg-stackify-eh.ll b/llvm/test/CodeGen/WebAssembly/cfg-stackify-eh.ll index 191c3d785521f..c79eb935179f0 100644 --- a/llvm/test/CodeGen/WebAssembly/cfg-stackify-eh.ll +++ b/llvm/test/CodeGen/WebAssembly/cfg-stackify-eh.ll @@ -664,11 +664,51 @@ if.end: ; preds = %cont, %catch.start, ret void } +%class.Object = type { i8 } + +; Intrinsics like memcpy, memmove, and memset don't throw and are lowered into +; calls to external symbols (not global addresses) in instruction selection, +; which will be eventually lowered to library function calls. +; Because this test runs with -wasm-disable-ehpad-sort, these library calls in +; invoke.cont BB fall within try~end_try, but they shouldn't cause crashes or +; unwinding destination mismatches in CFGStackify. + +; NOSORT-LABEL: test10 +; NOSORT: try +; NOSORT: call foo +; NOSORT: i32.call {{.*}} memcpy +; NOSORT: i32.call {{.*}} memmove +; NOSORT: i32.call {{.*}} memset +; NOSORT: return +; NOSORT: catch +; NOSORT: rethrow +; NOSORT: end_try +define void @test10(i8* %a, i8* %b) personality i8* bitcast (i32 (...)* @__gxx_wasm_personality_v0 to i8*) { +entry: + %o = alloca %class.Object, align 1 + invoke void @foo() + to label %invoke.cont unwind label %ehcleanup + +invoke.cont: ; preds = %entry + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a, i8* %b, i32 100, i1 false) + call void @llvm.memmove.p0i8.p0i8.i32(i8* %a, i8* %b, i32 100, i1 false) + call void @llvm.memset.p0i8.i32(i8* %a, i8 0, i32 100, i1 false) + %call = call %class.Object* @_ZN6ObjectD2Ev(%class.Object* %o) #1 + ret void + +ehcleanup: ; preds = %entry + %0 = cleanuppad within none [] + %call2 = call %class.Object* @_ZN6ObjectD2Ev(%class.Object* %o) #1 [ "funclet"(token %0) ] + cleanupret from %0 unwind to caller +} + declare void @foo() declare void @bar() declare i32 @baz() ; Function Attrs: nounwind declare void @nothrow(i32) #0 +; Function Attrs: nounwind +declare %class.Object* @_ZN6ObjectD2Ev(%class.Object* returned) #0 declare i32 @__gxx_wasm_personality_v0(...) declare i8* @llvm.wasm.get.exception(token) declare i32 @llvm.wasm.get.ehselector(token) @@ -678,5 +718,11 @@ declare i8* @__cxa_begin_catch(i8*) declare void @__cxa_end_catch() declare void @__clang_call_terminate(i8*) declare void @_ZSt9terminatev() +; Function Attrs: nounwind +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i32, i1 immarg) #0 +; Function Attrs: nounwind +declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i1 immarg) #0 +; Function Attrs: nounwind +declare void @llvm.memset.p0i8.i32(i8* nocapture writeonly, i8, i32, i1 immarg) #0 attributes #0 = { nounwind } From 69a3b21a5cd860a66fdea188c561f3336d3d5f66 Mon Sep 17 00:00:00 2001 From: Adrian Prantl Date: Mon, 7 Oct 2019 21:23:19 +0000 Subject: [PATCH 168/254] Mark constructor as default and remove implementation (NFC) llvm-svn: 373968 --- lldb/include/lldb/Core/Mangled.h | 2 +- lldb/source/Core/Mangled.cpp | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/lldb/include/lldb/Core/Mangled.h b/lldb/include/lldb/Core/Mangled.h index fb52afd6ed346..b60b0dd7c950f 100644 --- a/lldb/include/lldb/Core/Mangled.h +++ b/lldb/include/lldb/Core/Mangled.h @@ -49,7 +49,7 @@ class Mangled { /// Default constructor. /// /// Initialize with both mangled and demangled names empty. - Mangled(); + Mangled() = default; /// Construct with name. /// diff --git a/lldb/source/Core/Mangled.cpp b/lldb/source/Core/Mangled.cpp index 944c7b0bcfee1..1153d05331f29 100644 --- a/lldb/source/Core/Mangled.cpp +++ b/lldb/source/Core/Mangled.cpp @@ -124,8 +124,6 @@ get_demangled_name_without_arguments(ConstString mangled, } #pragma mark Mangled -// Default constructor -Mangled::Mangled() : m_mangled(), m_demangled() {} // Constructor with an optional string and a boolean indicating if it is the // mangled version. From 60786f9143926e86525abaa917494c406493534a Mon Sep 17 00:00:00 2001 From: Cameron McInally Date: Mon, 7 Oct 2019 21:33:39 +0000 Subject: [PATCH 169/254] [llvm-c] Add UnaryOperator to LLVM_FOR_EACH_VALUE_SUBCLASS macro Note that we are not sure where the tests for these functions lives. This was discussed in the Phab Diff. Differential Revision: https://reviews.llvm.org/D68588 llvm-svn: 373969 --- llvm/include/llvm-c/Core.h | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/include/llvm-c/Core.h b/llvm/include/llvm-c/Core.h index 815541c65d8cc..b84970956666a 100644 --- a/llvm/include/llvm-c/Core.h +++ b/llvm/include/llvm-c/Core.h @@ -1543,6 +1543,7 @@ LLVMTypeRef LLVMX86MMXType(void); macro(GlobalVariable) \ macro(UndefValue) \ macro(Instruction) \ + macro(UnaryOperator) \ macro(BinaryOperator) \ macro(CallInst) \ macro(IntrinsicInst) \ From 33f054a316672b1ef54c9f4e6960ac1e7e5acbf5 Mon Sep 17 00:00:00 2001 From: Davide Italiano Date: Mon, 7 Oct 2019 21:38:30 +0000 Subject: [PATCH 170/254] [CMake] We only want to copy the headers for macOS. llvm-svn: 373970 --- lldb/cmake/modules/LLDBFramework.cmake | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lldb/cmake/modules/LLDBFramework.cmake b/lldb/cmake/modules/LLDBFramework.cmake index 638515659f356..249fea30a8749 100644 --- a/lldb/cmake/modules/LLDBFramework.cmake +++ b/lldb/cmake/modules/LLDBFramework.cmake @@ -86,11 +86,13 @@ add_dependencies(liblldb liblldb-resource-headers) # At build time, copy the staged headers into the framework bundle (and do # some post-processing in-place). +if (NOT IOS) add_custom_command(TARGET liblldb POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_directory ${lldb_header_staging} $/Headers COMMAND ${LLDB_SOURCE_DIR}/scripts/framework-header-fix.sh $/Headers ${LLDB_VERSION} COMMENT "LLDB.framework: copy framework headers" ) +endif() # Copy vendor-specific headers from clang (without staging). if(NOT IOS) From a9d43b55c7d2f40b42a1aae7f84917d13121fce3 Mon Sep 17 00:00:00 2001 From: Zoe Carver Date: Mon, 7 Oct 2019 21:41:15 +0000 Subject: [PATCH 171/254] [libc++] Remove C++03 variadics in shared_ptr (v2) Summary: In my last patch (D67675) I forgot a few variadics. This patch removes the remaining make_shared and allocate_shared C++03 variadics. Reviewers: ldionne, EricWF, mclow.lists Subscribers: christof, dexonsmith, libcxx-commits Tags: #libc Differential Revision: https://reviews.llvm.org/D68000 llvm-svn: 373971 --- libcxx/include/memory | 70 ------------------------------------------- 1 file changed, 70 deletions(-) diff --git a/libcxx/include/memory b/libcxx/include/memory index 0336a4d5d02d1..e4bde1eb7e32d 100644 --- a/libcxx/include/memory +++ b/libcxx/include/memory @@ -4413,8 +4413,6 @@ shared_ptr<_Tp>::reset(_Yp* __p, _Dp __d, _Alloc __a) shared_ptr(__p, __d, __a).swap(*this); } -#ifndef _LIBCPP_HAS_NO_VARIADICS - template inline _LIBCPP_INLINE_VISIBILITY typename enable_if @@ -4439,74 +4437,6 @@ allocate_shared(const _Alloc& __a, _Args&& ...__args) return shared_ptr<_Tp>::allocate_shared(__a, _VSTD::forward<_Args>(__args)...); } -#else // _LIBCPP_HAS_NO_VARIADICS - -template -inline _LIBCPP_INLINE_VISIBILITY -shared_ptr<_Tp> -make_shared() -{ - return shared_ptr<_Tp>::make_shared(); -} - -template -inline _LIBCPP_INLINE_VISIBILITY -shared_ptr<_Tp> -make_shared(_A0& __a0) -{ - return shared_ptr<_Tp>::make_shared(__a0); -} - -template -inline _LIBCPP_INLINE_VISIBILITY -shared_ptr<_Tp> -make_shared(_A0& __a0, _A1& __a1) -{ - return shared_ptr<_Tp>::make_shared(__a0, __a1); -} - -template -inline _LIBCPP_INLINE_VISIBILITY -shared_ptr<_Tp> -make_shared(_A0& __a0, _A1& __a1, _A2& __a2) -{ - return shared_ptr<_Tp>::make_shared(__a0, __a1, __a2); -} - -template -inline _LIBCPP_INLINE_VISIBILITY -shared_ptr<_Tp> -allocate_shared(const _Alloc& __a) -{ - return shared_ptr<_Tp>::allocate_shared(__a); -} - -template -inline _LIBCPP_INLINE_VISIBILITY -shared_ptr<_Tp> -allocate_shared(const _Alloc& __a, _A0& __a0) -{ - return shared_ptr<_Tp>::allocate_shared(__a, __a0); -} - -template -inline _LIBCPP_INLINE_VISIBILITY -shared_ptr<_Tp> -allocate_shared(const _Alloc& __a, _A0& __a0, _A1& __a1) -{ - return shared_ptr<_Tp>::allocate_shared(__a, __a0, __a1); -} - -template -inline _LIBCPP_INLINE_VISIBILITY -shared_ptr<_Tp> -allocate_shared(const _Alloc& __a, _A0& __a0, _A1& __a1, _A2& __a2) -{ - return shared_ptr<_Tp>::allocate_shared(__a, __a0, __a1, __a2); -} - -#endif // _LIBCPP_HAS_NO_VARIADICS - template inline _LIBCPP_INLINE_VISIBILITY bool From ee33c61e341c23cbffe5c583107353d54fc67be8 Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Mon, 7 Oct 2019 21:48:08 +0000 Subject: [PATCH 172/254] [Attributor][FIX] Remove assertion wrong for on invalid IRPositions llvm-svn: 373972 --- llvm/include/llvm/Transforms/IPO/Attributor.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h index 517afe85ec4c4..f1bba68414525 100644 --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -398,8 +398,6 @@ struct IRPosition { assert(KindOrArgNo < 0 && "Expected (call site) arguments to never reach this point!"); - assert(!isa(getAnchorValue()) && - "Expected arguments to have an associated argument position!"); return Kind(KindOrArgNo); } From aaea76ba02301efd8aa0c8d5da4af400d03b2fb6 Mon Sep 17 00:00:00 2001 From: David Bolvansky Date: Mon, 7 Oct 2019 21:57:03 +0000 Subject: [PATCH 173/254] [Diagnostics] Emit better -Wbool-operation's warning message if we known that the result is always true llvm-svn: 373973 --- clang/include/clang/Basic/DiagnosticSemaKinds.td | 3 ++- clang/lib/Sema/SemaChecking.cpp | 7 +++++++ clang/lib/Sema/SemaExpr.cpp | 4 ---- clang/test/Sema/warn-bitwise-negation-bool.c | 6 +++--- 4 files changed, 12 insertions(+), 8 deletions(-) diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index c54380639a434..23cedcab27bf1 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -6638,7 +6638,8 @@ def note_member_declared_here : Note< def note_member_first_declared_here : Note< "member %0 first declared here">; def warn_bitwise_negation_bool : Warning< - "bitwise negation of a boolean expression; did you mean logical negation?">, + "bitwise negation of a boolean expression%select{;| always evaluates to 'true';}0 " + "did you mean logical negation?">, InGroup>; def err_decrement_bool : Error<"cannot decrement expression of type bool">; def warn_increment_bool : Warning< diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index eeddff6c7144a..de8e1ef87a997 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -11896,6 +11896,13 @@ static void AnalyzeImplicitConversions(Sema &S, Expr *OrigE, SourceLocation CC, if (E->isTypeDependent() || E->isValueDependent()) return; + if (const auto *UO = dyn_cast(E)) + if (UO->getOpcode() == UO_Not && + UO->getSubExpr()->isKnownToHaveBooleanValue()) + S.Diag(UO->getBeginLoc(), diag::warn_bitwise_negation_bool) + << OrigE->getSourceRange() << T->isBooleanType() + << FixItHint::CreateReplacement(UO->getBeginLoc(), "!"); + // For conditional operators, we analyze the arguments as if they // were being fed directly into the output. if (isa(E)) { diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 3cb999dacc40b..f08b616809464 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -13479,10 +13479,6 @@ ExprResult Sema::CreateBuiltinUnaryOp(SourceLocation OpLoc, // C99 does not support '~' for complex conjugation. Diag(OpLoc, diag::ext_integer_complement_complex) << resultType << Input.get()->getSourceRange(); - else if (Input.get()->isKnownToHaveBooleanValue()) - Diag(OpLoc, diag::warn_bitwise_negation_bool) - << Input.get()->getSourceRange() - << FixItHint::CreateReplacement(OpLoc, "!"); else if (resultType->hasIntegerRepresentation()) break; else if (resultType->isExtVectorType() && Context.getLangOpts().OpenCL) { diff --git a/clang/test/Sema/warn-bitwise-negation-bool.c b/clang/test/Sema/warn-bitwise-negation-bool.c index 435d783439c69..c74705bc765a2 100644 --- a/clang/test/Sema/warn-bitwise-negation-bool.c +++ b/clang/test/Sema/warn-bitwise-negation-bool.c @@ -12,13 +12,13 @@ typedef _Bool boolean; #endif void test(boolean b, int i) { - b = ~b; // expected-warning {{bitwise negation of a boolean expression; did you mean logical negation?}} + b = ~b; // expected-warning {{bitwise negation of a boolean expression always evaluates to 'true'; did you mean logical negation?}} // CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:7-[[@LINE-1]]:8}:"!" - b = ~(b); // expected-warning {{bitwise negation of a boolean expression; did you mean logical negation?}} + b = ~(b); // expected-warning {{bitwise negation of a boolean expression always evaluates to 'true'; did you mean logical negation?}} // CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:7-[[@LINE-1]]:8}:"!" b = ~i; i = ~b; // expected-warning {{bitwise negation of a boolean expression; did you mean logical negation?}} // CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:7-[[@LINE-1]]:8}:"!" - b = ~(i > 4); // expected-warning {{bitwise negation of a boolean expression; did you mean logical negation?}} + b = ~(i > 4); // expected-warning {{bitwise negation of a boolean expression always evaluates to 'true'; did you mean logical negation?}} // CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:7-[[@LINE-1]]:8}:"!" } From ae5bad7277f497a8b19e331dffc14c74e3db7587 Mon Sep 17 00:00:00 2001 From: Heejin Ahn Date: Mon, 7 Oct 2019 22:11:30 +0000 Subject: [PATCH 174/254] [llvm-lipo] Add TextAPI to LINK_COMPONENTS Summary: D68319 uses `MachO::getCPUTypeFromArchitecture` and without this builds with `-DBUILD_SHARED_LIBS=ON` fail. Reviewers: alexshap Subscribers: mgorny, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D68594 llvm-svn: 373974 --- llvm/tools/llvm-lipo/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/tools/llvm-lipo/CMakeLists.txt b/llvm/tools/llvm-lipo/CMakeLists.txt index 992785723f03e..335b286dd5f5e 100644 --- a/llvm/tools/llvm-lipo/CMakeLists.txt +++ b/llvm/tools/llvm-lipo/CMakeLists.txt @@ -3,6 +3,7 @@ set(LLVM_LINK_COMPONENTS Object Option Support + TextAPI ) set(LLVM_TARGET_DEFINITIONS LipoOpts.td) From daeead4b02f062bf5a21e154ab7726b3e1dd41bd Mon Sep 17 00:00:00 2001 From: Heejin Ahn Date: Mon, 7 Oct 2019 22:19:40 +0000 Subject: [PATCH 175/254] [WebAssembly] Fix unwind mismatch stat computation Summary: There was a bug when computing the number of unwind destination mismatches in CFGStackify. When there are many mismatched calls that share the same (original) destination BB, they have to be counted separately. This also fixes a typo and runs `fixUnwindMismatches` only when the wasm exception handling is enabled. This is to prevent unnecessary computations and does not change behavior. Reviewers: dschuff Subscribers: sbc100, jgravelle-google, hiraditya, sunfish, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D68552 llvm-svn: 373975 --- llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp | 8 +++++--- llvm/test/CodeGen/WebAssembly/cfg-stackify-eh.ll | 4 ++++ 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp index 6b2094679cea5..d2a35574e2fee 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp @@ -848,7 +848,7 @@ bool WebAssemblyCFGStackify::fixUnwindMismatches(MachineFunction &MF) { SmallVector EHPadStack; // Range of intructions to be wrapped in a new nested try/catch using TryRange = std::pair; - // In original CFG, + // In original CFG, DenseMap> UnwindDestToTryRanges; // In new CFG, DenseMap> BrDestToTryRanges; @@ -985,7 +985,7 @@ bool WebAssemblyCFGStackify::fixUnwindMismatches(MachineFunction &MF) { // ... // cont: for (auto &P : UnwindDestToTryRanges) { - NumUnwindMismatches++; + NumUnwindMismatches += P.second.size(); // This means the destination is the appendix BB, which was separately // handled above. @@ -1300,7 +1300,9 @@ void WebAssemblyCFGStackify::placeMarkers(MachineFunction &MF) { } } // Fix mismatches in unwind destinations induced by linearizing the code. - fixUnwindMismatches(MF); + if (MCAI->getExceptionHandlingType() == ExceptionHandling::Wasm && + MF.getFunction().hasPersonalityFn()) + fixUnwindMismatches(MF); } void WebAssemblyCFGStackify::rewriteDepthImmediates(MachineFunction &MF) { diff --git a/llvm/test/CodeGen/WebAssembly/cfg-stackify-eh.ll b/llvm/test/CodeGen/WebAssembly/cfg-stackify-eh.ll index c79eb935179f0..f33f56701ebb8 100644 --- a/llvm/test/CodeGen/WebAssembly/cfg-stackify-eh.ll +++ b/llvm/test/CodeGen/WebAssembly/cfg-stackify-eh.ll @@ -1,6 +1,7 @@ ; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -disable-block-placement -verify-machineinstrs -fast-isel=false -machine-sink-split-probability-threshold=0 -cgp-freq-ratio-to-skip-merge=1000 -exception-model=wasm -mattr=+exception-handling | FileCheck %s ; RUN: llc < %s -O0 -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -verify-machineinstrs -exception-model=wasm -mattr=+exception-handling | FileCheck %s --check-prefix=NOOPT ; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -disable-block-placement -verify-machineinstrs -fast-isel=false -machine-sink-split-probability-threshold=0 -cgp-freq-ratio-to-skip-merge=1000 -exception-model=wasm -mattr=+exception-handling -wasm-disable-ehpad-sort | FileCheck %s --check-prefix=NOSORT +; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -disable-block-placement -verify-machineinstrs -fast-isel=false -machine-sink-split-probability-threshold=0 -cgp-freq-ratio-to-skip-merge=1000 -exception-model=wasm -mattr=+exception-handling -wasm-disable-ehpad-sort -stats 2>&1 | FileCheck %s --check-prefix=NOSORT-STAT target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" target triple = "wasm32-unknown-unknown" @@ -702,6 +703,9 @@ ehcleanup: ; preds = %entry cleanupret from %0 unwind to caller } +; Check if the unwind destination mismatch stats are correct +; NOSORT-STAT: 11 wasm-cfg-stackify - Number of EH pad unwind mismatches found + declare void @foo() declare void @bar() declare i32 @baz() From f9b67b810e2d413606b0d5891ed9477d819bcab9 Mon Sep 17 00:00:00 2001 From: Reid Kleckner Date: Mon, 7 Oct 2019 22:28:58 +0000 Subject: [PATCH 176/254] [X86] Add new calling convention that guarantees tail call optimization When the target option GuaranteedTailCallOpt is specified, calls with the fastcc calling convention will be transformed into tail calls if they are in tail position. This diff adds a new calling convention, tailcc, currently supported only on X86, which behaves the same way as fastcc, except that the GuaranteedTailCallOpt flag does not need to enabled in order to enable tail call optimization. Patch by Dwight Guth ! Reviewed By: lebedev.ri, paquette, rnk Differential Revision: https://reviews.llvm.org/D67855 llvm-svn: 373976 --- llvm/docs/BitCodeFormat.rst | 1 + llvm/docs/CodeGenerator.rst | 4 +- llvm/docs/LangRef.rst | 17 +- llvm/include/llvm/IR/CallingConv.h | 5 + llvm/lib/AsmParser/LLLexer.cpp | 1 + llvm/lib/AsmParser/LLParser.cpp | 2 + llvm/lib/AsmParser/LLToken.h | 1 + llvm/lib/CodeGen/Analysis.cpp | 3 +- llvm/lib/IR/AsmWriter.cpp | 1 + llvm/lib/Target/X86/X86CallingConv.td | 2 + llvm/lib/Target/X86/X86FastISel.cpp | 10 +- llvm/lib/Target/X86/X86FrameLowering.cpp | 3 +- llvm/lib/Target/X86/X86ISelLowering.cpp | 19 ++- llvm/lib/Target/X86/X86Subtarget.h | 1 + llvm/test/CodeGen/X86/musttail-tailcc.ll | 114 +++++++++++++ llvm/test/CodeGen/X86/tailcall-tailcc.ll | 155 ++++++++++++++++++ llvm/test/CodeGen/X86/tailcc-calleesave.ll | 19 +++ .../CodeGen/X86/tailcc-disable-tail-calls.ll | 40 +++++ llvm/test/CodeGen/X86/tailcc-fastcc.ll | 49 ++++++ llvm/test/CodeGen/X86/tailcc-fastisel.ll | 18 ++ llvm/test/CodeGen/X86/tailcc-largecode.ll | 71 ++++++++ llvm/test/CodeGen/X86/tailcc-stackalign.ll | 23 +++ llvm/test/CodeGen/X86/tailcc-structret.ll | 7 + llvm/test/CodeGen/X86/tailccbyval.ll | 21 +++ llvm/test/CodeGen/X86/tailccbyval64.ll | 42 +++++ llvm/test/CodeGen/X86/tailccfp.ll | 6 + llvm/test/CodeGen/X86/tailccfp2.ll | 27 +++ llvm/test/CodeGen/X86/tailccpic1.ll | 16 ++ llvm/test/CodeGen/X86/tailccpic2.ll | 15 ++ llvm/test/CodeGen/X86/tailccstack64.ll | 28 ++++ llvm/utils/vim/syntax/llvm.vim | 1 + 31 files changed, 703 insertions(+), 19 deletions(-) create mode 100644 llvm/test/CodeGen/X86/musttail-tailcc.ll create mode 100644 llvm/test/CodeGen/X86/tailcall-tailcc.ll create mode 100644 llvm/test/CodeGen/X86/tailcc-calleesave.ll create mode 100644 llvm/test/CodeGen/X86/tailcc-disable-tail-calls.ll create mode 100644 llvm/test/CodeGen/X86/tailcc-fastcc.ll create mode 100644 llvm/test/CodeGen/X86/tailcc-fastisel.ll create mode 100644 llvm/test/CodeGen/X86/tailcc-largecode.ll create mode 100644 llvm/test/CodeGen/X86/tailcc-stackalign.ll create mode 100644 llvm/test/CodeGen/X86/tailcc-structret.ll create mode 100644 llvm/test/CodeGen/X86/tailccbyval.ll create mode 100644 llvm/test/CodeGen/X86/tailccbyval64.ll create mode 100644 llvm/test/CodeGen/X86/tailccfp.ll create mode 100644 llvm/test/CodeGen/X86/tailccfp2.ll create mode 100644 llvm/test/CodeGen/X86/tailccpic1.ll create mode 100644 llvm/test/CodeGen/X86/tailccpic2.ll create mode 100644 llvm/test/CodeGen/X86/tailccstack64.ll diff --git a/llvm/docs/BitCodeFormat.rst b/llvm/docs/BitCodeFormat.rst index 4e653ae55d535..dce84620fd7b0 100644 --- a/llvm/docs/BitCodeFormat.rst +++ b/llvm/docs/BitCodeFormat.rst @@ -794,6 +794,7 @@ function. The operand fields are: * ``preserve_allcc``: code 15 * ``swiftcc`` : code 16 * ``cxx_fast_tlscc``: code 17 + * ``tailcc`` : code 18 * ``x86_stdcallcc``: code 64 * ``x86_fastcallcc``: code 65 * ``arm_apcscc``: code 66 diff --git a/llvm/docs/CodeGenerator.rst b/llvm/docs/CodeGenerator.rst index 343b9879972fc..75330a5df3baa 100644 --- a/llvm/docs/CodeGenerator.rst +++ b/llvm/docs/CodeGenerator.rst @@ -2068,12 +2068,12 @@ supported on x86/x86-64, PowerPC, and WebAssembly. It is performed on x86/x86-64 and PowerPC if: * Caller and callee have the calling convention ``fastcc``, ``cc 10`` (GHC - calling convention) or ``cc 11`` (HiPE calling convention). + calling convention), ``cc 11`` (HiPE calling convention), or ``tailcc``. * The call is a tail call - in tail position (ret immediately follows call and ret uses value of call or is void). -* Option ``-tailcallopt`` is enabled. +* Option ``-tailcallopt`` is enabled or the calling convention is ``tailcc``. * Platform-specific constraints are met. diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index d9a38907c920a..e797b1f9a15d8 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -299,7 +299,7 @@ added in the future: allows the target to use whatever tricks it wants to produce fast code for the target, without having to conform to an externally specified ABI (Application Binary Interface). `Tail calls can only - be optimized when this, the GHC or the HiPE convention is + be optimized when this, the tailcc, the GHC or the HiPE convention is used. `_ This calling convention does not support varargs and requires the prototype of all callees to exactly match the prototype of the function definition. @@ -436,6 +436,14 @@ added in the future: - On X86-64 RCX and R8 are available for additional integer returns, and XMM2 and XMM3 are available for additional FP/vector returns. - On iOS platforms, we use AAPCS-VFP calling convention. +"``tailcc``" - Tail callable calling convention + This calling convention ensures that calls in tail position will always be + tail call optimized. This calling convention is equivalent to fastcc, + except for an additional guarantee that tail calls will be produced + whenever possible. `Tail calls can only be optimized when this, the fastcc, + the GHC or the HiPE convention is used. `_ This + calling convention does not support varargs and requires the prototype of + all callees to exactly match the prototype of the function definition. "``cc ``" - Numbered convention Any calling convention may be specified by number, allowing target-specific calling conventions to be used. Target specific @@ -10232,11 +10240,12 @@ This instruction requires several arguments: Tail call optimization for calls marked ``tail`` is guaranteed to occur if the following conditions are met: - - Caller and callee both have the calling convention ``fastcc``. + - Caller and callee both have the calling convention ``fastcc`` or ``tailcc``. - The call is in tail position (ret immediately follows call and ret uses value of call or is void). - - Option ``-tailcallopt`` is enabled, or - ``llvm::GuaranteedTailCallOpt`` is ``true``. + - Option ``-tailcallopt`` is enabled, + ``llvm::GuaranteedTailCallOpt`` is ``true``, or the calling convention + is ``tailcc`` - `Platform-specific constraints are met. `_ diff --git a/llvm/include/llvm/IR/CallingConv.h b/llvm/include/llvm/IR/CallingConv.h index 6f4989268fa35..c1c979c2e2aba 100644 --- a/llvm/include/llvm/IR/CallingConv.h +++ b/llvm/include/llvm/IR/CallingConv.h @@ -75,6 +75,11 @@ namespace CallingConv { // CXX_FAST_TLS - Calling convention for access functions. CXX_FAST_TLS = 17, + /// Tail - This calling convention attemps to make calls as fast as + /// possible while guaranteeing that tail call optimization can always + /// be performed. + Tail = 18, + // Target - This is the start of the target-specific calling conventions, // e.g. fastcall and thiscall on X86. FirstTargetCC = 64, diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp index 72d2357c29333..5292b0e627442 100644 --- a/llvm/lib/AsmParser/LLLexer.cpp +++ b/llvm/lib/AsmParser/LLLexer.cpp @@ -622,6 +622,7 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(amdgpu_ps); KEYWORD(amdgpu_cs); KEYWORD(amdgpu_kernel); + KEYWORD(tailcc); KEYWORD(cc); KEYWORD(c); diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp index 5ea0b7d39c170..9bb3ca145c2d4 100644 --- a/llvm/lib/AsmParser/LLParser.cpp +++ b/llvm/lib/AsmParser/LLParser.cpp @@ -1955,6 +1955,7 @@ void LLParser::ParseOptionalDLLStorageClass(unsigned &Res) { /// ::= 'amdgpu_ps' /// ::= 'amdgpu_cs' /// ::= 'amdgpu_kernel' +/// ::= 'tailcc' /// ::= 'cc' UINT /// bool LLParser::ParseOptionalCallingConv(unsigned &CC) { @@ -2000,6 +2001,7 @@ bool LLParser::ParseOptionalCallingConv(unsigned &CC) { case lltok::kw_amdgpu_ps: CC = CallingConv::AMDGPU_PS; break; case lltok::kw_amdgpu_cs: CC = CallingConv::AMDGPU_CS; break; case lltok::kw_amdgpu_kernel: CC = CallingConv::AMDGPU_KERNEL; break; + case lltok::kw_tailcc: CC = CallingConv::Tail; break; case lltok::kw_cc: { Lex.Lex(); return ParseUInt32(CC); diff --git a/llvm/lib/AsmParser/LLToken.h b/llvm/lib/AsmParser/LLToken.h index 0e9ba4db47427..f49feb2dc14d1 100644 --- a/llvm/lib/AsmParser/LLToken.h +++ b/llvm/lib/AsmParser/LLToken.h @@ -168,6 +168,7 @@ enum Kind { kw_amdgpu_ps, kw_amdgpu_cs, kw_amdgpu_kernel, + kw_tailcc, // Attributes: kw_attributes, diff --git a/llvm/lib/CodeGen/Analysis.cpp b/llvm/lib/CodeGen/Analysis.cpp index 3ef90d32daf55..6c059665fca32 100644 --- a/llvm/lib/CodeGen/Analysis.cpp +++ b/llvm/lib/CodeGen/Analysis.cpp @@ -523,7 +523,8 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, const TargetMachine &TM) { // longjmp on x86), it can end up causing miscompilation that has not // been fully understood. if (!Ret && - (!TM.Options.GuaranteedTailCallOpt || !isa(Term))) + ((!TM.Options.GuaranteedTailCallOpt && + CS.getCallingConv() != CallingConv::Tail) || !isa(Term))) return false; // If I will have a chain, make sure no other instruction that will have a diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp index 3f140ba01d822..91f22dbb17afc 100644 --- a/llvm/lib/IR/AsmWriter.cpp +++ b/llvm/lib/IR/AsmWriter.cpp @@ -352,6 +352,7 @@ static void PrintCallingConv(unsigned cc, raw_ostream &Out) { case CallingConv::PreserveAll: Out << "preserve_allcc"; break; case CallingConv::CXX_FAST_TLS: Out << "cxx_fast_tlscc"; break; case CallingConv::GHC: Out << "ghccc"; break; + case CallingConv::Tail: Out << "tailcc"; break; case CallingConv::X86_StdCall: Out << "x86_stdcallcc"; break; case CallingConv::X86_FastCall: Out << "x86_fastcallcc"; break; case CallingConv::X86_ThisCall: Out << "x86_thiscallcc"; break; diff --git a/llvm/lib/Target/X86/X86CallingConv.td b/llvm/lib/Target/X86/X86CallingConv.td index 1c3034a5116ab..4c49d68bec99f 100644 --- a/llvm/lib/Target/X86/X86CallingConv.td +++ b/llvm/lib/Target/X86/X86CallingConv.td @@ -433,6 +433,7 @@ defm X86_SysV64_RegCall : def RetCC_X86_32 : CallingConv<[ // If FastCC, use RetCC_X86_32_Fast. CCIfCC<"CallingConv::Fast", CCDelegateTo>, + CCIfCC<"CallingConv::Tail", CCDelegateTo>, // If HiPE, use RetCC_X86_32_HiPE. CCIfCC<"CallingConv::HiPE", CCDelegateTo>, CCIfCC<"CallingConv::X86_VectorCall", CCDelegateTo>, @@ -1000,6 +1001,7 @@ def CC_X86_32 : CallingConv<[ CCIfCC<"CallingConv::X86_VectorCall", CCDelegateTo>, CCIfCC<"CallingConv::X86_ThisCall", CCDelegateTo>, CCIfCC<"CallingConv::Fast", CCDelegateTo>, + CCIfCC<"CallingConv::Tail", CCDelegateTo>, CCIfCC<"CallingConv::GHC", CCDelegateTo>, CCIfCC<"CallingConv::HiPE", CCDelegateTo>, CCIfCC<"CallingConv::X86_RegCall", CCDelegateTo>, diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp index 97abd084bf5d9..e5e089d07d55c 100644 --- a/llvm/lib/Target/X86/X86FastISel.cpp +++ b/llvm/lib/Target/X86/X86FastISel.cpp @@ -1160,6 +1160,7 @@ bool X86FastISel::X86SelectRet(const Instruction *I) { CallingConv::ID CC = F.getCallingConv(); if (CC != CallingConv::C && CC != CallingConv::Fast && + CC != CallingConv::Tail && CC != CallingConv::X86_FastCall && CC != CallingConv::X86_StdCall && CC != CallingConv::X86_ThisCall && @@ -1173,7 +1174,8 @@ bool X86FastISel::X86SelectRet(const Instruction *I) { // fastcc with -tailcallopt is intended to provide a guaranteed // tail call optimization. Fastisel doesn't know how to do that. - if (CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt) + if ((CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt) || + CC == CallingConv::Tail) return false; // Let SDISel handle vararg functions. @@ -3157,7 +3159,7 @@ static unsigned computeBytesPoppedByCalleeForSRet(const X86Subtarget *Subtarget, if (Subtarget->getTargetTriple().isOSMSVCRT()) return 0; if (CC == CallingConv::Fast || CC == CallingConv::GHC || - CC == CallingConv::HiPE) + CC == CallingConv::HiPE || CC == CallingConv::Tail) return 0; if (CS) @@ -3208,6 +3210,7 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) { default: return false; case CallingConv::C: case CallingConv::Fast: + case CallingConv::Tail: case CallingConv::WebKit_JS: case CallingConv::Swift: case CallingConv::X86_FastCall: @@ -3224,7 +3227,8 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) { // fastcc with -tailcallopt is intended to provide a guaranteed // tail call optimization. Fastisel doesn't know how to do that. - if (CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt) + if ((CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt) || + CC == CallingConv::Tail) return false; // Don't know how to handle Win64 varargs yet. Nothing special needed for diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp index af3a33ffd4e8b..fabc3e581ffe8 100644 --- a/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -2269,7 +2269,8 @@ GetScratchRegister(bool Is64Bit, bool IsLP64, const MachineFunction &MF, bool Pr bool IsNested = HasNestArgument(&MF); if (CallingConvention == CallingConv::X86_FastCall || - CallingConvention == CallingConv::Fast) { + CallingConvention == CallingConv::Fast || + CallingConvention == CallingConv::Tail) { if (IsNested) report_fatal_error("Segmented stacks does not support fastcall with " "nested function."); diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 3806b0e233017..052300d6f7241 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -2963,7 +2963,7 @@ static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, static bool canGuaranteeTCO(CallingConv::ID CC) { return (CC == CallingConv::Fast || CC == CallingConv::GHC || CC == CallingConv::X86_RegCall || CC == CallingConv::HiPE || - CC == CallingConv::HHVM); + CC == CallingConv::HHVM || CC == CallingConv::Tail); } /// Return true if we might ever do TCO for calls with this calling convention. @@ -2989,7 +2989,7 @@ static bool mayTailCallThisCC(CallingConv::ID CC) { /// Return true if the function is being made into a tailcall target by /// changing its ABI. static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) { - return GuaranteedTailCallOpt && canGuaranteeTCO(CC); + return (GuaranteedTailCallOpt && canGuaranteeTCO(CC)) || CC == CallingConv::Tail; } bool X86TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { @@ -3615,6 +3615,8 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, bool IsWin64 = Subtarget.isCallingConvWin64(CallConv); StructReturnType SR = callIsStructReturn(Outs, Subtarget.isTargetMCU()); bool IsSibcall = false; + bool IsGuaranteeTCO = MF.getTarget().Options.GuaranteedTailCallOpt || + CallConv == CallingConv::Tail; X86MachineFunctionInfo *X86Info = MF.getInfo(); auto Attr = MF.getFunction().getFnAttribute("disable-tail-calls"); const auto *CI = dyn_cast_or_null(CLI.CS.getInstruction()); @@ -3635,8 +3637,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, if (Attr.getValueAsString() == "true") isTailCall = false; - if (Subtarget.isPICStyleGOT() && - !MF.getTarget().Options.GuaranteedTailCallOpt) { + if (Subtarget.isPICStyleGOT() && !IsGuaranteeTCO) { // If we are using a GOT, disable tail calls to external symbols with // default visibility. Tail calling such a symbol requires using a GOT // relocation, which forces early binding of the symbol. This breaks code @@ -3663,7 +3664,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Sibcalls are automatically detected tailcalls which do not require // ABI changes. - if (!MF.getTarget().Options.GuaranteedTailCallOpt && isTailCall) + if (!IsGuaranteeTCO && isTailCall) IsSibcall = true; if (isTailCall) @@ -3695,8 +3696,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // This is a sibcall. The memory operands are available in caller's // own caller's stack. NumBytes = 0; - else if (MF.getTarget().Options.GuaranteedTailCallOpt && - canGuaranteeTCO(CallConv)) + else if (IsGuaranteeTCO && canGuaranteeTCO(CallConv)) NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG); int FPDiff = 0; @@ -4321,6 +4321,8 @@ bool X86TargetLowering::IsEligibleForTailCallOptimization( bool CCMatch = CallerCC == CalleeCC; bool IsCalleeWin64 = Subtarget.isCallingConvWin64(CalleeCC); bool IsCallerWin64 = Subtarget.isCallingConvWin64(CallerCC); + bool IsGuaranteeTCO = DAG.getTarget().Options.GuaranteedTailCallOpt || + CalleeCC == CallingConv::Tail; // Win64 functions have extra shadow space for argument homing. Don't do the // sibcall if the caller and callee have mismatched expectations for this @@ -4328,7 +4330,7 @@ bool X86TargetLowering::IsEligibleForTailCallOptimization( if (IsCalleeWin64 != IsCallerWin64) return false; - if (DAG.getTarget().Options.GuaranteedTailCallOpt) { + if (IsGuaranteeTCO) { if (canGuaranteeTCO(CalleeCC) && CCMatch) return true; return false; @@ -24421,6 +24423,7 @@ SDValue X86TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, case CallingConv::X86_FastCall: case CallingConv::X86_ThisCall: case CallingConv::Fast: + case CallingConv::Tail: // Pass 'nest' parameter in EAX. // Must be kept in sync with X86CallingConv.td NestReg = X86::EAX; diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h index 4d7495641d923..b5b1c19c45564 100644 --- a/llvm/lib/Target/X86/X86Subtarget.h +++ b/llvm/lib/Target/X86/X86Subtarget.h @@ -815,6 +815,7 @@ class X86Subtarget final : public X86GenSubtargetInfo { // On Win64, all these conventions just use the default convention. case CallingConv::C: case CallingConv::Fast: + case CallingConv::Tail: case CallingConv::Swift: case CallingConv::X86_FastCall: case CallingConv::X86_StdCall: diff --git a/llvm/test/CodeGen/X86/musttail-tailcc.ll b/llvm/test/CodeGen/X86/musttail-tailcc.ll new file mode 100644 index 0000000000000..6057045a77dfa --- /dev/null +++ b/llvm/test/CodeGen/X86/musttail-tailcc.ll @@ -0,0 +1,114 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s -check-prefix=X64 +; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s -check-prefix=X32 + +; tailcc will turn all of these musttail calls into tail calls. + +declare tailcc i32 @tailcallee(i32 %a1, i32 %a2) + +define tailcc i32 @tailcaller(i32 %in1, i32 %in2) nounwind { +; X64-LABEL: tailcaller: +; X64: # %bb.0: # %entry +; X64-NEXT: pushq %rax +; X64-NEXT: popq %rax +; X64-NEXT: jmp tailcallee # TAILCALL +; +; X32-LABEL: tailcaller: +; X32: # %bb.0: # %entry +; X32-NEXT: jmp tailcallee # TAILCALL +entry: + %tmp11 = musttail call tailcc i32 @tailcallee(i32 %in1, i32 %in2) + ret i32 %tmp11 +} + +declare tailcc i8* @alias_callee() + +define tailcc noalias i8* @noalias_caller() nounwind { +; X64-LABEL: noalias_caller: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: popq %rax +; X64-NEXT: jmp alias_callee # TAILCALL +; +; X32-LABEL: noalias_caller: +; X32: # %bb.0: +; X32-NEXT: jmp alias_callee # TAILCALL + %p = musttail call tailcc i8* @alias_callee() + ret i8* %p +} + +declare tailcc noalias i8* @noalias_callee() + +define tailcc i8* @alias_caller() nounwind { +; X64-LABEL: alias_caller: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: popq %rax +; X64-NEXT: jmp noalias_callee # TAILCALL +; +; X32-LABEL: alias_caller: +; X32: # %bb.0: +; X32-NEXT: jmp noalias_callee # TAILCALL + %p = musttail call tailcc noalias i8* @noalias_callee() + ret i8* %p +} + +define tailcc void @void_test(i32, i32, i32, i32) { +; X64-LABEL: void_test: +; X64: # %bb.0: # %entry +; X64-NEXT: pushq %rax +; X64-NEXT: .cfi_def_cfa_offset 16 +; X64-NEXT: popq %rax +; X64-NEXT: .cfi_def_cfa_offset 8 +; X64-NEXT: jmp void_test # TAILCALL +; +; X32-LABEL: void_test: +; X32: # %bb.0: # %entry +; X32-NEXT: pushl %esi +; X32-NEXT: .cfi_def_cfa_offset 8 +; X32-NEXT: subl $8, %esp +; X32-NEXT: .cfi_def_cfa_offset 16 +; X32-NEXT: .cfi_offset %esi, -8 +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %esi +; X32-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X32-NEXT: addl $8, %esp +; X32-NEXT: .cfi_def_cfa_offset 8 +; X32-NEXT: popl %esi +; X32-NEXT: .cfi_def_cfa_offset 4 +; X32-NEXT: jmp void_test # TAILCALL + entry: + musttail call tailcc void @void_test( i32 %0, i32 %1, i32 %2, i32 %3) + ret void +} + +define tailcc i1 @i1test(i32, i32, i32, i32) { +; X64-LABEL: i1test: +; X64: # %bb.0: # %entry +; X64-NEXT: pushq %rax +; X64-NEXT: .cfi_def_cfa_offset 16 +; X64-NEXT: popq %rax +; X64-NEXT: .cfi_def_cfa_offset 8 +; X64-NEXT: jmp i1test # TAILCALL +; +; X32-LABEL: i1test: +; X32: # %bb.0: # %entry +; X32-NEXT: pushl %esi +; X32-NEXT: .cfi_def_cfa_offset 8 +; X32-NEXT: subl $8, %esp +; X32-NEXT: .cfi_def_cfa_offset 16 +; X32-NEXT: .cfi_offset %esi, -8 +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %esi +; X32-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X32-NEXT: addl $8, %esp +; X32-NEXT: .cfi_def_cfa_offset 8 +; X32-NEXT: popl %esi +; X32-NEXT: .cfi_def_cfa_offset 4 +; X32-NEXT: jmp i1test # TAILCALL + entry: + %4 = musttail call tailcc i1 @i1test( i32 %0, i32 %1, i32 %2, i32 %3) + ret i1 %4 +} diff --git a/llvm/test/CodeGen/X86/tailcall-tailcc.ll b/llvm/test/CodeGen/X86/tailcall-tailcc.ll new file mode 100644 index 0000000000000..5a427034a7266 --- /dev/null +++ b/llvm/test/CodeGen/X86/tailcall-tailcc.ll @@ -0,0 +1,155 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s -check-prefix=X64 +; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s -check-prefix=X32 + +; With -tailcallopt, CodeGen guarantees a tail call optimization +; for all of these. + +declare tailcc i32 @tailcallee(i32 %a1, i32 %a2, i32 %a3, i32 %a4) + +define tailcc i32 @tailcaller(i32 %in1, i32 %in2) nounwind { +; X64-LABEL: tailcaller: +; X64: # %bb.0: # %entry +; X64-NEXT: pushq %rax +; X64-NEXT: movl %edi, %edx +; X64-NEXT: movl %esi, %ecx +; X64-NEXT: popq %rax +; X64-NEXT: jmp tailcallee # TAILCALL +; +; X32-LABEL: tailcaller: +; X32: # %bb.0: # %entry +; X32-NEXT: subl $16, %esp +; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X32-NEXT: addl $8, %esp +; X32-NEXT: jmp tailcallee # TAILCALL +entry: + %tmp11 = tail call tailcc i32 @tailcallee(i32 %in1, i32 %in2, i32 %in1, i32 %in2) + ret i32 %tmp11 +} + +declare tailcc i8* @alias_callee() + +define tailcc noalias i8* @noalias_caller() nounwind { +; X64-LABEL: noalias_caller: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: popq %rax +; X64-NEXT: jmp alias_callee # TAILCALL +; +; X32-LABEL: noalias_caller: +; X32: # %bb.0: +; X32-NEXT: jmp alias_callee # TAILCALL + %p = tail call tailcc i8* @alias_callee() + ret i8* %p +} + +declare tailcc noalias i8* @noalias_callee() + +define tailcc i8* @alias_caller() nounwind { +; X64-LABEL: alias_caller: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: popq %rax +; X64-NEXT: jmp noalias_callee # TAILCALL +; +; X32-LABEL: alias_caller: +; X32: # %bb.0: +; X32-NEXT: jmp noalias_callee # TAILCALL + %p = tail call tailcc noalias i8* @noalias_callee() + ret i8* %p +} + +declare tailcc i32 @i32_callee() + +define tailcc i32 @ret_undef() nounwind { +; X64-LABEL: ret_undef: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: popq %rax +; X64-NEXT: jmp i32_callee # TAILCALL +; +; X32-LABEL: ret_undef: +; X32: # %bb.0: +; X32-NEXT: jmp i32_callee # TAILCALL + %p = tail call tailcc i32 @i32_callee() + ret i32 undef +} + +declare tailcc void @does_not_return() + +define tailcc i32 @noret() nounwind { +; X64-LABEL: noret: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: popq %rax +; X64-NEXT: jmp does_not_return # TAILCALL +; +; X32-LABEL: noret: +; X32: # %bb.0: +; X32-NEXT: jmp does_not_return # TAILCALL + tail call tailcc void @does_not_return() + unreachable +} + +define tailcc void @void_test(i32, i32, i32, i32) { +; X64-LABEL: void_test: +; X64: # %bb.0: # %entry +; X64-NEXT: pushq %rax +; X64-NEXT: .cfi_def_cfa_offset 16 +; X64-NEXT: popq %rax +; X64-NEXT: .cfi_def_cfa_offset 8 +; X64-NEXT: jmp void_test # TAILCALL +; +; X32-LABEL: void_test: +; X32: # %bb.0: # %entry +; X32-NEXT: pushl %esi +; X32-NEXT: .cfi_def_cfa_offset 8 +; X32-NEXT: subl $8, %esp +; X32-NEXT: .cfi_def_cfa_offset 16 +; X32-NEXT: .cfi_offset %esi, -8 +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %esi +; X32-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X32-NEXT: addl $8, %esp +; X32-NEXT: .cfi_def_cfa_offset 8 +; X32-NEXT: popl %esi +; X32-NEXT: .cfi_def_cfa_offset 4 +; X32-NEXT: jmp void_test # TAILCALL + entry: + tail call tailcc void @void_test( i32 %0, i32 %1, i32 %2, i32 %3) + ret void +} + +define tailcc i1 @i1test(i32, i32, i32, i32) { +; X64-LABEL: i1test: +; X64: # %bb.0: # %entry +; X64-NEXT: pushq %rax +; X64-NEXT: .cfi_def_cfa_offset 16 +; X64-NEXT: popq %rax +; X64-NEXT: .cfi_def_cfa_offset 8 +; X64-NEXT: jmp i1test # TAILCALL +; +; X32-LABEL: i1test: +; X32: # %bb.0: # %entry +; X32-NEXT: pushl %esi +; X32-NEXT: .cfi_def_cfa_offset 8 +; X32-NEXT: subl $8, %esp +; X32-NEXT: .cfi_def_cfa_offset 16 +; X32-NEXT: .cfi_offset %esi, -8 +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %esi +; X32-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X32-NEXT: addl $8, %esp +; X32-NEXT: .cfi_def_cfa_offset 8 +; X32-NEXT: popl %esi +; X32-NEXT: .cfi_def_cfa_offset 4 +; X32-NEXT: jmp i1test # TAILCALL + entry: + %4 = tail call tailcc i1 @i1test( i32 %0, i32 %1, i32 %2, i32 %3) + ret i1 %4 +} diff --git a/llvm/test/CodeGen/X86/tailcc-calleesave.ll b/llvm/test/CodeGen/X86/tailcc-calleesave.ll new file mode 100644 index 0000000000000..09685fb17cbaf --- /dev/null +++ b/llvm/test/CodeGen/X86/tailcc-calleesave.ll @@ -0,0 +1,19 @@ +; RUN: llc -mcpu=core < %s | FileCheck %s + +target triple = "i686-apple-darwin" + +declare tailcc void @foo(i32, i32, i32, i32, i32, i32) +declare i32* @bar(i32*) + +define tailcc void @hoge(i32 %b) nounwind { +; Do not overwrite pushed callee-save registers +; CHECK: pushl +; CHECK: subl $[[SIZE:[0-9]+]], %esp +; CHECK-NOT: [[SIZE]](%esp) + %a = alloca i32 + store i32 0, i32* %a + %d = tail call i32* @bar(i32* %a) nounwind + store i32 %b, i32* %d + tail call tailcc void @foo(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6) nounwind + ret void +} diff --git a/llvm/test/CodeGen/X86/tailcc-disable-tail-calls.ll b/llvm/test/CodeGen/X86/tailcc-disable-tail-calls.ll new file mode 100644 index 0000000000000..3199b8c34b770 --- /dev/null +++ b/llvm/test/CodeGen/X86/tailcc-disable-tail-calls.ll @@ -0,0 +1,40 @@ +; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s --check-prefix=NO-OPTION +; RUN: llc < %s -mtriple=x86_64-- -disable-tail-calls | FileCheck %s --check-prefix=DISABLE-TRUE +; RUN: llc < %s -mtriple=x86_64-- -disable-tail-calls=false | FileCheck %s --check-prefix=DISABLE-FALSE + +; Check that command line option "-disable-tail-calls" overrides function +; attribute "disable-tail-calls". + +; NO-OPTION-LABEL: {{\_?}}func_attr +; NO-OPTION: callq {{\_?}}callee + +; DISABLE-FALSE-LABEL: {{\_?}}func_attr +; DISABLE-FALSE: jmp {{\_?}}callee + +; DISABLE-TRUE-LABEL: {{\_?}}func_attr +; DISABLE-TRUE: callq {{\_?}}callee + +define tailcc i32 @func_attr(i32 %a) #0 { +entry: + %call = tail call tailcc i32 @callee(i32 %a) + ret i32 %call +} + +; NO-OPTION-LABEL: {{\_?}}func_noattr +; NO-OPTION: jmp {{\_?}}callee + +; DISABLE-FALSE-LABEL: {{\_?}}func_noattr +; DISABLE-FALSE: jmp {{\_?}}callee + +; DISABLE-TRUE-LABEL: {{\_?}}func_noattr +; DISABLE-TRUE: callq {{\_?}}callee + +define tailcc i32 @func_noattr(i32 %a) { +entry: + %call = tail call tailcc i32 @callee(i32 %a) + ret i32 %call +} + +declare tailcc i32 @callee(i32) + +attributes #0 = { "disable-tail-calls"="true" } diff --git a/llvm/test/CodeGen/X86/tailcc-fastcc.ll b/llvm/test/CodeGen/X86/tailcc-fastcc.ll new file mode 100644 index 0000000000000..03369855de4ac --- /dev/null +++ b/llvm/test/CodeGen/X86/tailcc-fastcc.ll @@ -0,0 +1,49 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -tailcallopt < %s -mtriple=x86_64-unknown-unknown | FileCheck %s -check-prefix=X64 +; RUN: llc -tailcallopt < %s -mtriple=i686-unknown-unknown | FileCheck %s -check-prefix=X32 + +; llc -tailcallopt should not enable tail calls from fastcc to tailcc or vice versa + +declare tailcc i32 @tailcallee1(i32 %a1, i32 %a2, i32 %a3, i32 %a4) + +define fastcc i32 @tailcaller1(i32 %in1, i32 %in2) nounwind { +; X64-LABEL: tailcaller1: +; X64: # %bb.0: # %entry +; X64-NEXT: pushq %rax +; X64-NEXT: movl %edi, %edx +; X64-NEXT: movl %esi, %ecx +; X64-NEXT: callq tailcallee1 +; X64-NEXT: retq $8 +; +; X32-LABEL: tailcaller1: +; X32: # %bb.0: # %entry +; X32-NEXT: pushl %edx +; X32-NEXT: pushl %ecx +; X32-NEXT: calll tailcallee1 +; X32-NEXT: retl +entry: + %tmp11 = tail call tailcc i32 @tailcallee1(i32 %in1, i32 %in2, i32 %in1, i32 %in2) + ret i32 %tmp11 +} + +declare fastcc i32 @tailcallee2(i32 %a1, i32 %a2, i32 %a3, i32 %a4) + +define tailcc i32 @tailcaller2(i32 %in1, i32 %in2) nounwind { +; X64-LABEL: tailcaller2: +; X64: # %bb.0: # %entry +; X64-NEXT: pushq %rax +; X64-NEXT: movl %edi, %edx +; X64-NEXT: movl %esi, %ecx +; X64-NEXT: callq tailcallee2 +; X64-NEXT: retq $8 +; +; X32-LABEL: tailcaller2: +; X32: # %bb.0: # %entry +; X32-NEXT: pushl %edx +; X32-NEXT: pushl %ecx +; X32-NEXT: calll tailcallee2 +; X32-NEXT: retl +entry: + %tmp11 = tail call fastcc i32 @tailcallee2(i32 %in1, i32 %in2, i32 %in1, i32 %in2) + ret i32 %tmp11 +} diff --git a/llvm/test/CodeGen/X86/tailcc-fastisel.ll b/llvm/test/CodeGen/X86/tailcc-fastisel.ll new file mode 100644 index 0000000000000..e6d75faf4cde7 --- /dev/null +++ b/llvm/test/CodeGen/X86/tailcc-fastisel.ll @@ -0,0 +1,18 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin -fast-isel -fast-isel-abort=1 | FileCheck %s + +%0 = type { i64, i32, i8* } + +define tailcc i8* @"visit_array_aux<`Reference>"(%0 %arg, i32 %arg1) nounwind { +fail: ; preds = %entry + %tmp20 = tail call tailcc i8* @"visit_array_aux<`Reference>"(%0 %arg, i32 undef) ; [#uses=1] +; CHECK: jmp "_visit_array_aux<`Reference>" ## TAILCALL + ret i8* %tmp20 +} + +define i32 @foo() nounwind { +entry: + %0 = tail call i32 (...) @bar() nounwind ; [#uses=1] + ret i32 %0 +} + +declare i32 @bar(...) nounwind diff --git a/llvm/test/CodeGen/X86/tailcc-largecode.ll b/llvm/test/CodeGen/X86/tailcc-largecode.ll new file mode 100644 index 0000000000000..a3b5c30074556 --- /dev/null +++ b/llvm/test/CodeGen/X86/tailcc-largecode.ll @@ -0,0 +1,71 @@ +; RUN: llc < %s -mtriple=x86_64-linux-gnu -code-model=large -enable-misched=false | FileCheck %s + +declare tailcc i32 @callee(i32 %arg) +define tailcc i32 @directcall(i32 %arg) { +entry: +; This is the large code model, so &callee may not fit into the jmp +; instruction. Instead, stick it into a register. +; CHECK: movabsq $callee, [[REGISTER:%r[a-z0-9]+]] +; CHECK: jmpq *[[REGISTER]] # TAILCALL + %res = tail call tailcc i32 @callee(i32 %arg) + ret i32 %res +} + +; Check that the register used for an indirect tail call doesn't +; clobber any of the arguments. +define tailcc i32 @indirect_manyargs(i32(i32,i32,i32,i32,i32,i32,i32)* %target) { +; Adjust the stack to enter the function. (The amount of the +; adjustment may change in the future, in which case the location of +; the stack argument and the return adjustment will change too.) +; CHECK: pushq +; Put the call target into R11, which won't be clobbered while restoring +; callee-saved registers and won't be used for passing arguments. +; CHECK: movq %rdi, %rax +; Pass the stack argument. +; CHECK: movl $7, 16(%rsp) +; Pass the register arguments, in the right registers. +; CHECK: movl $1, %edi +; CHECK: movl $2, %esi +; CHECK: movl $3, %edx +; CHECK: movl $4, %ecx +; CHECK: movl $5, %r8d +; CHECK: movl $6, %r9d +; Adjust the stack to "return". +; CHECK: popq +; And tail-call to the target. +; CHECK: jmpq *%rax # TAILCALL + %res = tail call tailcc i32 %target(i32 1, i32 2, i32 3, i32 4, i32 5, + i32 6, i32 7) + ret i32 %res +} + +; Check that the register used for a direct tail call doesn't clobber +; any of the arguments. +declare tailcc i32 @manyargs_callee(i32,i32,i32,i32,i32,i32,i32) +define tailcc i32 @direct_manyargs() { +; Adjust the stack to enter the function. (The amount of the +; adjustment may change in the future, in which case the location of +; the stack argument and the return adjustment will change too.) +; CHECK: pushq +; Pass the stack argument. +; CHECK: movl $7, 16(%rsp) +; This is the large code model, so &manyargs_callee may not fit into +; the jmp instruction. Put it into a register which won't be clobbered +; while restoring callee-saved registers and won't be used for passing +; arguments. +; CHECK: movabsq $manyargs_callee, %rax +; Pass the register arguments, in the right registers. +; CHECK: movl $1, %edi +; CHECK: movl $2, %esi +; CHECK: movl $3, %edx +; CHECK: movl $4, %ecx +; CHECK: movl $5, %r8d +; CHECK: movl $6, %r9d +; Adjust the stack to "return". +; CHECK: popq +; And tail-call to the target. +; CHECK: jmpq *%rax # TAILCALL + %res = tail call tailcc i32 @manyargs_callee(i32 1, i32 2, i32 3, i32 4, + i32 5, i32 6, i32 7) + ret i32 %res +} diff --git a/llvm/test/CodeGen/X86/tailcc-stackalign.ll b/llvm/test/CodeGen/X86/tailcc-stackalign.ll new file mode 100644 index 0000000000000..36333a9a213d8 --- /dev/null +++ b/llvm/test/CodeGen/X86/tailcc-stackalign.ll @@ -0,0 +1,23 @@ +; RUN: llc < %s -mtriple=i686-unknown-linux -no-x86-call-frame-opt | FileCheck %s +; Linux has 8 byte alignment so the params cause stack size 20, +; ensure that a normal tailcc call has matching stack size + + +define tailcc i32 @tailcallee(i32 %a1, i32 %a2, i32 %a3, i32 %a4) { + ret i32 %a3 +} + +define tailcc i32 @tailcaller(i32 %in1, i32 %in2, i32 %in3, i32 %in4) { + %tmp11 = tail call tailcc i32 @tailcallee(i32 %in1, i32 %in2, + i32 %in1, i32 %in2) + ret i32 %tmp11 +} + +define i32 @main(i32 %argc, i8** %argv) { + %tmp1 = call tailcc i32 @tailcaller( i32 1, i32 2, i32 3, i32 4 ) + ; expect match subl [stacksize] here + ret i32 0 +} + +; CHECK: calll tailcaller +; CHECK-NEXT: subl $12 diff --git a/llvm/test/CodeGen/X86/tailcc-structret.ll b/llvm/test/CodeGen/X86/tailcc-structret.ll new file mode 100644 index 0000000000000..2d83d4a3c9f5d --- /dev/null +++ b/llvm/test/CodeGen/X86/tailcc-structret.ll @@ -0,0 +1,7 @@ +; RUN: llc < %s -mtriple=i686-unknown-linux | FileCheck %s +define tailcc { { i8*, i8* }*, i8*} @init({ { i8*, i8* }*, i8*}, i32) { +entry: + %2 = tail call tailcc { { i8*, i8* }*, i8* } @init({ { i8*, i8*}*, i8*} %0, i32 %1) + ret { { i8*, i8* }*, i8*} %2 +; CHECK: jmp init +} diff --git a/llvm/test/CodeGen/X86/tailccbyval.ll b/llvm/test/CodeGen/X86/tailccbyval.ll new file mode 100644 index 0000000000000..dbde868e51114 --- /dev/null +++ b/llvm/test/CodeGen/X86/tailccbyval.ll @@ -0,0 +1,21 @@ +; RUN: llc < %s -mtriple=i686-unknown-linux | FileCheck %s +%struct.s = type {i32, i32, i32, i32, i32, i32, i32, i32, + i32, i32, i32, i32, i32, i32, i32, i32, + i32, i32, i32, i32, i32, i32, i32, i32 } + +define tailcc i32 @tailcallee(%struct.s* byval %a) nounwind { +entry: + %tmp2 = getelementptr %struct.s, %struct.s* %a, i32 0, i32 0 + %tmp3 = load i32, i32* %tmp2 + ret i32 %tmp3 +; CHECK: tailcallee +; CHECK: movl 4(%esp), %eax +} + +define tailcc i32 @tailcaller(%struct.s* byval %a) nounwind { +entry: + %tmp4 = tail call tailcc i32 @tailcallee(%struct.s* byval %a ) + ret i32 %tmp4 +; CHECK: tailcaller +; CHECK: jmp tailcallee +} diff --git a/llvm/test/CodeGen/X86/tailccbyval64.ll b/llvm/test/CodeGen/X86/tailccbyval64.ll new file mode 100644 index 0000000000000..47d20ea972adb --- /dev/null +++ b/llvm/test/CodeGen/X86/tailccbyval64.ll @@ -0,0 +1,42 @@ +; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux | FileCheck %s + +; FIXME: Win64 does not support byval. + +; Expect the entry point. +; CHECK-LABEL: tailcaller: + +; Expect 2 rep;movs because of tail call byval lowering. +; CHECK: rep; +; CHECK: rep; + +; A sequence of copyto/copyfrom virtual registers is used to deal with byval +; lowering appearing after moving arguments to registers. The following two +; checks verify that the register allocator changes those sequences to direct +; moves to argument register where it can (for registers that are not used in +; byval lowering - not rsi, not rdi, not rcx). +; Expect argument 4 to be moved directly to register edx. +; CHECK: movl $7, %edx + +; Expect argument 6 to be moved directly to register r8. +; CHECK: movl $17, %r8d + +; Expect not call but jmp to @tailcallee. +; CHECK: jmp tailcallee + +; Expect the trailer. +; CHECK: .size tailcaller + +%struct.s = type { i64, i64, i64, i64, i64, i64, i64, i64, + i64, i64, i64, i64, i64, i64, i64, i64, + i64, i64, i64, i64, i64, i64, i64, i64 } + +declare tailcc i64 @tailcallee(%struct.s* byval %a, i64 %val, i64 %val2, i64 %val3, i64 %val4, i64 %val5) + + +define tailcc i64 @tailcaller(i64 %b, %struct.s* byval %a) { +entry: + %tmp2 = getelementptr %struct.s, %struct.s* %a, i32 0, i32 1 + %tmp3 = load i64, i64* %tmp2, align 8 + %tmp4 = tail call tailcc i64 @tailcallee(%struct.s* byval %a , i64 %tmp3, i64 %b, i64 7, i64 13, i64 17) + ret i64 %tmp4 +} diff --git a/llvm/test/CodeGen/X86/tailccfp.ll b/llvm/test/CodeGen/X86/tailccfp.ll new file mode 100644 index 0000000000000..32814e93f458e --- /dev/null +++ b/llvm/test/CodeGen/X86/tailccfp.ll @@ -0,0 +1,6 @@ +; RUN: llc < %s -mtriple=i686-- | FileCheck %s +define tailcc i32 @bar(i32 %X, i32(double, i32) *%FP) { + %Y = tail call tailcc i32 %FP(double 0.0, i32 %X) + ret i32 %Y +; CHECK: jmpl +} diff --git a/llvm/test/CodeGen/X86/tailccfp2.ll b/llvm/test/CodeGen/X86/tailccfp2.ll new file mode 100644 index 0000000000000..f8b29b386ad59 --- /dev/null +++ b/llvm/test/CodeGen/X86/tailccfp2.ll @@ -0,0 +1,27 @@ +; RUN: llc < %s -mtriple=i686-- | FileCheck %s + +declare i32 @putchar(i32) + +define tailcc i32 @checktail(i32 %x, i32* %f, i32 %g) nounwind { +; CHECK-LABEL: checktail: + %tmp1 = icmp sgt i32 %x, 0 + br i1 %tmp1, label %if-then, label %if-else + +if-then: + %fun_ptr = bitcast i32* %f to i32(i32, i32*, i32)* + %arg1 = add i32 %x, -1 + call i32 @putchar(i32 90) +; CHECK: jmpl *%e{{.*}} + %res = tail call tailcc i32 %fun_ptr( i32 %arg1, i32 * %f, i32 %g) + ret i32 %res + +if-else: + ret i32 %x +} + + +define i32 @main() nounwind { + %f = bitcast i32 (i32, i32*, i32)* @checktail to i32* + %res = tail call tailcc i32 @checktail( i32 10, i32* %f,i32 10) + ret i32 %res +} diff --git a/llvm/test/CodeGen/X86/tailccpic1.ll b/llvm/test/CodeGen/X86/tailccpic1.ll new file mode 100644 index 0000000000000..de8f2219bc2f3 --- /dev/null +++ b/llvm/test/CodeGen/X86/tailccpic1.ll @@ -0,0 +1,16 @@ +; RUN: llc < %s -mtriple=i686-pc-linux-gnu -relocation-model=pic | FileCheck %s + +; This test uses guaranteed TCO so these will be tail calls, despite the early +; binding issues. + +define protected tailcc i32 @tailcallee(i32 %a1, i32 %a2, i32 %a3, i32 %a4) { +entry: + ret i32 %a3 +} + +define tailcc i32 @tailcaller(i32 %in1, i32 %in2) { +entry: + %tmp11 = tail call tailcc i32 @tailcallee( i32 %in1, i32 %in2, i32 %in1, i32 %in2 ) ; [#uses=1] + ret i32 %tmp11 +; CHECK: jmp tailcallee +} diff --git a/llvm/test/CodeGen/X86/tailccpic2.ll b/llvm/test/CodeGen/X86/tailccpic2.ll new file mode 100644 index 0000000000000..314cd8f2fd67c --- /dev/null +++ b/llvm/test/CodeGen/X86/tailccpic2.ll @@ -0,0 +1,15 @@ +; RUN: llc < %s -mtriple=i686-pc-linux-gnu -relocation-model=pic | FileCheck %s + +define tailcc i32 @tailcallee(i32 %a1, i32 %a2, i32 %a3, i32 %a4) { +entry: + ret i32 %a3 +} + +define tailcc i32 @tailcaller(i32 %in1, i32 %in2) { +entry: + %tmp11 = tail call tailcc i32 @tailcallee( i32 %in1, i32 %in2, i32 %in1, i32 %in2 ) ; [#uses=1] + ret i32 %tmp11 +; CHECK: movl tailcallee@GOT +; CHECK: jmpl +} + diff --git a/llvm/test/CodeGen/X86/tailccstack64.ll b/llvm/test/CodeGen/X86/tailccstack64.ll new file mode 100644 index 0000000000000..bd0f4a739504f --- /dev/null +++ b/llvm/test/CodeGen/X86/tailccstack64.ll @@ -0,0 +1,28 @@ +; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux -post-RA-scheduler=true | FileCheck %s +; RUN: llc < %s -mcpu=generic -mtriple=x86_64-win32 -post-RA-scheduler=true | FileCheck %s + +; FIXME: Redundant unused stack allocation could be eliminated. +; CHECK: subq ${{24|72|80}}, %rsp + +; Check that lowered arguments on the stack do not overwrite each other. +; Add %in1 %p1 to a different temporary register (%eax). +; CHECK: movl [[A1:32|144]](%rsp), [[R1:%e..]] +; Move param %in1 to temp register (%r10d). +; CHECK: movl [[A2:40|152]](%rsp), [[R2:%[a-z0-9]+]] +; Add %in1 %p1 to a different temporary register (%eax). +; CHECK: addl {{%edi|%ecx}}, [[R1]] +; Move param %in2 to stack. +; CHECK-DAG: movl [[R2]], [[A1]](%rsp) +; Move result of addition to stack. +; CHECK-DAG: movl [[R1]], [[A2]](%rsp) +; Eventually, do a TAILCALL +; CHECK: TAILCALL + +declare tailcc i32 @tailcallee(i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5, i32 %p6, i32 %a, i32 %b) nounwind + +define tailcc i32 @tailcaller(i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5, i32 %p6, i32 %in1, i32 %in2) nounwind { +entry: + %tmp = add i32 %in1, %p1 + %retval = tail call tailcc i32 @tailcallee(i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5, i32 %p6, i32 %in2,i32 %tmp) + ret i32 %retval +} diff --git a/llvm/utils/vim/syntax/llvm.vim b/llvm/utils/vim/syntax/llvm.vim index 14987cb2348f3..487a37b4b86ba 100644 --- a/llvm/utils/vim/syntax/llvm.vim +++ b/llvm/utils/vim/syntax/llvm.vim @@ -82,6 +82,7 @@ syn keyword llvmKeyword \ externally_initialized \ extern_weak \ fastcc + \ tailcc \ filter \ from \ gc From 96ac97a4213287003f08636d0c372b3f71e9cfca Mon Sep 17 00:00:00 2001 From: Jan Korous Date: Mon, 7 Oct 2019 22:36:19 +0000 Subject: [PATCH 177/254] Add VFS support for sanitizers' blacklist Differential Revision: https://reviews.llvm.org/D67742 llvm-svn: 373977 --- clang/lib/AST/ASTContext.cpp | 19 ++++++++++++++++++- .../sanitizer-blacklist-vfsoverlay.yaml | 15 +++++++++++++++ clang/test/CodeGen/ubsan-blacklist.c | 11 +++++++++++ 3 files changed, 44 insertions(+), 1 deletion(-) create mode 100644 clang/test/CodeGen/Inputs/sanitizer-blacklist-vfsoverlay.yaml diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index a41b64ffcc81f..906c54194d942 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -72,6 +72,7 @@ #include "llvm/ADT/PointerUnion.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" @@ -81,6 +82,7 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Support/VirtualFileSystem.h" #include "llvm/Support/raw_ostream.h" #include #include @@ -826,6 +828,18 @@ static bool isAddrSpaceMapManglingEnabled(const TargetInfo &TI, llvm_unreachable("getAddressSpaceMapMangling() doesn't cover anything."); } +static std::vector +getRealPaths(llvm::vfs::FileSystem &VFS, llvm::ArrayRef Paths) { + std::vector Result; + llvm::SmallString<128> Buffer; + for (const auto &File : Paths) { + if (std::error_code EC = VFS.getRealPath(File, Buffer)) + llvm::report_fatal_error("can't open file '" + File + "': " + EC.message()); + Result.push_back(Buffer.str()); + } + return Result; +} + ASTContext::ASTContext(LangOptions &LOpts, SourceManager &SM, IdentifierTable &idents, SelectorTable &sels, Builtin::Context &builtins) @@ -833,7 +847,10 @@ ASTContext::ASTContext(LangOptions &LOpts, SourceManager &SM, TemplateSpecializationTypes(this_()), DependentTemplateSpecializationTypes(this_()), SubstTemplateTemplateParmPacks(this_()), SourceMgr(SM), LangOpts(LOpts), - SanitizerBL(new SanitizerBlacklist(LangOpts.SanitizerBlacklistFiles, SM)), + SanitizerBL(new SanitizerBlacklist( + getRealPaths(SM.getFileManager().getVirtualFileSystem(), + LangOpts.SanitizerBlacklistFiles), + SM)), XRayFilter(new XRayFunctionFilter(LangOpts.XRayAlwaysInstrumentFiles, LangOpts.XRayNeverInstrumentFiles, LangOpts.XRayAttrListFiles, SM)), diff --git a/clang/test/CodeGen/Inputs/sanitizer-blacklist-vfsoverlay.yaml b/clang/test/CodeGen/Inputs/sanitizer-blacklist-vfsoverlay.yaml new file mode 100644 index 0000000000000..df2b221897693 --- /dev/null +++ b/clang/test/CodeGen/Inputs/sanitizer-blacklist-vfsoverlay.yaml @@ -0,0 +1,15 @@ +{ + 'version': 0, + 'roots': [ + { 'name': '@DIR@', 'type': 'directory', + 'contents': [ + { 'name': 'only-virtual-file.blacklist', 'type': 'file', + 'external-contents': '@REAL_FILE@' + }, + { 'name': 'invalid-virtual-file.blacklist', 'type': 'file', + 'external-contents': '@NONEXISTENT_FILE@' + } + ] + } + ] +} diff --git a/clang/test/CodeGen/ubsan-blacklist.c b/clang/test/CodeGen/ubsan-blacklist.c index 666003bd9233c..d6b4b71431480 100644 --- a/clang/test/CodeGen/ubsan-blacklist.c +++ b/clang/test/CodeGen/ubsan-blacklist.c @@ -5,6 +5,17 @@ // RUN: %clang_cc1 -fsanitize=unsigned-integer-overflow -fsanitize-blacklist=%t-func.blacklist -emit-llvm %s -o - | FileCheck %s --check-prefix=FUNC // RUN: %clang_cc1 -fsanitize=unsigned-integer-overflow -fsanitize-blacklist=%t-file.blacklist -emit-llvm %s -o - | FileCheck %s --check-prefix=FILE +// RUN: rm -f %t-vfsoverlay.yaml +// RUN: rm -f %t-nonexistent.blacklist +// RUN: sed -e "s|@DIR@|%T|g" %S/Inputs/sanitizer-blacklist-vfsoverlay.yaml | sed -e "s|@REAL_FILE@|%t-func.blacklist|g" | sed -e "s|@NONEXISTENT_FILE@|%t-nonexistent.blacklist|g" > %t-vfsoverlay.yaml +// RUN: %clang_cc1 -fsanitize=unsigned-integer-overflow -ivfsoverlay %t-vfsoverlay.yaml -fsanitize-blacklist=%T/only-virtual-file.blacklist -emit-llvm %s -o - | FileCheck %s --check-prefix=FUNC + +// RUN: not %clang_cc1 -fsanitize=unsigned-integer-overflow -ivfsoverlay %t-vfsoverlay.yaml -fsanitize-blacklist=%T/invalid-virtual-file.blacklist -emit-llvm %s -o - 2>&1 | FileCheck %s --check-prefix=INVALID-MAPPED-FILE +// INVALID-MAPPED-FILE: invalid-virtual-file.blacklist': No such file or directory + +// RUN: not %clang_cc1 -fsanitize=unsigned-integer-overflow -ivfsoverlay %t-vfsoverlay.yaml -fsanitize-blacklist=%t-nonexistent.blacklist -emit-llvm %s -o - 2>&1 | FileCheck %s --check-prefix=INVALID +// INVALID: nonexistent.blacklist': No such file or directory + unsigned i; // DEFAULT: @hash From 87dd9688493a0e215b4670cbd49c47192eeca7aa Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Mon, 7 Oct 2019 22:43:17 +0000 Subject: [PATCH 178/254] [tsan] Don't delay SIGTRAP handler Reviewers: eugenis, jfb Subscribers: #sanitizers, llvm-commits Tags: #sanitizers, #llvm Differential Revision: https://reviews.llvm.org/D68604 llvm-svn: 373978 --- .../lib/tsan/rtl/tsan_interceptors_posix.cpp | 9 +++--- .../TestCases/Linux/signal_trap_handler.cpp | 29 +++++++++++++++++++ 2 files changed, 34 insertions(+), 4 deletions(-) create mode 100644 compiler-rt/test/sanitizer_common/TestCases/Linux/signal_trap_handler.cpp diff --git a/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp b/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp index d1d83e23d5585..8aea1e4ec0513 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp @@ -114,6 +114,7 @@ const int PTHREAD_MUTEX_RECURSIVE_NP = 2; const int EPOLL_CTL_ADD = 1; #endif const int SIGILL = 4; +const int SIGTRAP = 5; const int SIGABRT = 6; const int SIGFPE = 8; const int SIGSEGV = 11; @@ -1962,10 +1963,10 @@ void ProcessPendingSignals(ThreadState *thr) { } // namespace __tsan static bool is_sync_signal(ThreadSignalContext *sctx, int sig) { - return sig == SIGSEGV || sig == SIGBUS || sig == SIGILL || - sig == SIGABRT || sig == SIGFPE || sig == SIGPIPE || sig == SIGSYS || - // If we are sending signal to ourselves, we must process it now. - (sctx && sig == sctx->int_signal_send); + return sig == SIGSEGV || sig == SIGBUS || sig == SIGILL || sig == SIGTRAP || + sig == SIGABRT || sig == SIGFPE || sig == SIGPIPE || sig == SIGSYS || + // If we are sending signal to ourselves, we must process it now. + (sctx && sig == sctx->int_signal_send); } void ALWAYS_INLINE rtl_generic_sighandler(bool sigact, int sig, diff --git a/compiler-rt/test/sanitizer_common/TestCases/Linux/signal_trap_handler.cpp b/compiler-rt/test/sanitizer_common/TestCases/Linux/signal_trap_handler.cpp new file mode 100644 index 0000000000000..9b4bc067e4920 --- /dev/null +++ b/compiler-rt/test/sanitizer_common/TestCases/Linux/signal_trap_handler.cpp @@ -0,0 +1,29 @@ +// RUN: %clangxx -O1 %s -o %t && %env_tool_opts=handle_sigtrap=1 %run %t 2>&1 | FileCheck %s + +#include +#include +#include + +int handled; + +void handler(int signo, siginfo_t *info, void *uctx) { + handled = 1; +} + +int main() { + struct sigaction a = {}, old = {}; + a.sa_sigaction = handler; + a.sa_flags = SA_SIGINFO; + sigaction(SIGTRAP, &a, &old); + + a = {}; + sigaction(SIGTRAP, 0, &a); + assert(a.sa_sigaction == handler); + assert(a.sa_flags & SA_SIGINFO); + + __builtin_debugtrap(); + assert(handled); + fprintf(stderr, "HANDLED %d\n", handled); +} + +// CHECK: HANDLED 1 From 9917c76107f827ec2ac19cbd5a42939ddd3bd2be Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Mon, 7 Oct 2019 22:43:19 +0000 Subject: [PATCH 179/254] [sanitizer] Print SIGTRAP for corresponding signal Reviewers: eugenis, jfb Subscribers: #sanitizers, llvm-commits Tags: #sanitizers, #llvm Differential Revision: https://reviews.llvm.org/D68603 llvm-svn: 373979 --- compiler-rt/lib/sanitizer_common/sanitizer_posix.cpp | 2 ++ .../test/sanitizer_common/TestCases/Linux/signal_trap.cpp | 8 ++++++++ 2 files changed, 10 insertions(+) create mode 100644 compiler-rt/test/sanitizer_common/TestCases/Linux/signal_trap.cpp diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_posix.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_posix.cpp index 002bcb1eda4ab..d890a3a317737 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_posix.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_posix.cpp @@ -312,6 +312,8 @@ const char *SignalContext::Describe() const { return "SEGV"; case SIGBUS: return "BUS"; + case SIGTRAP: + return "TRAP"; } return "UNKNOWN SIGNAL"; } diff --git a/compiler-rt/test/sanitizer_common/TestCases/Linux/signal_trap.cpp b/compiler-rt/test/sanitizer_common/TestCases/Linux/signal_trap.cpp new file mode 100644 index 0000000000000..4298d44b88ca4 --- /dev/null +++ b/compiler-rt/test/sanitizer_common/TestCases/Linux/signal_trap.cpp @@ -0,0 +1,8 @@ +// RUN: %clangxx -O1 %s -o %t && %env_tool_opts=handle_sigtrap=2 not %run %t 2>&1 | FileCheck %s + +int main() { + __builtin_debugtrap(); +} + +// CHECK: Sanitizer:DEADLYSIGNAL +// CHECK: Sanitizer: TRAP on unknown address From 2b9f0b064b48cd14298be7ce99549da0cd5d5596 Mon Sep 17 00:00:00 2001 From: Joerg Sonnenberger Date: Mon, 7 Oct 2019 22:55:42 +0000 Subject: [PATCH 180/254] Fix the spelling of my name. llvm-svn: 373980 --- llvm/docs/Proposals/GitHubMove.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/docs/Proposals/GitHubMove.rst b/llvm/docs/Proposals/GitHubMove.rst index 6d4a2a1f135e0..ed46f5ae199f6 100644 --- a/llvm/docs/Proposals/GitHubMove.rst +++ b/llvm/docs/Proposals/GitHubMove.rst @@ -1081,6 +1081,6 @@ References .. [LattnerRevNum] Chris Lattner, http://lists.llvm.org/pipermail/llvm-dev/2011-July/041739.html .. [TrickRevNum] Andrew Trick, http://lists.llvm.org/pipermail/llvm-dev/2011-July/041721.html -.. [JSonnRevNum] Joerg Sonnenberg, http://lists.llvm.org/pipermail/llvm-dev/2011-July/041688.html +.. [JSonnRevNum] Joerg Sonnenberger, http://lists.llvm.org/pipermail/llvm-dev/2011-July/041688.html .. [MatthewsRevNum] Chris Matthews, http://lists.llvm.org/pipermail/cfe-dev/2016-July/049886.html .. [statuschecks] GitHub status-checks, https://help.github.com/articles/about-required-status-checks/ From 7647d3ec7003ff5264d9d3aa1c2262a8f6853be5 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 7 Oct 2019 23:02:03 +0000 Subject: [PATCH 181/254] [X86] Add test cases for zero extending a gather index from less than i32 to i64. We should be able to use a smaller zero extend. llvm-svn: 373981 --- .../test/CodeGen/X86/masked_gather_scatter.ll | 102 ++++++++++++++++++ 1 file changed, 102 insertions(+) diff --git a/llvm/test/CodeGen/X86/masked_gather_scatter.ll b/llvm/test/CodeGen/X86/masked_gather_scatter.ll index 9bea63f3055ff..fe870e51538db 100644 --- a/llvm/test/CodeGen/X86/masked_gather_scatter.ll +++ b/llvm/test/CodeGen/X86/masked_gather_scatter.ll @@ -2689,6 +2689,108 @@ define <8 x float> @sext_v8i8_index(float* %base, <8 x i8> %ind) { } declare <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*>, i32, <8 x i1>, <8 x float>) +; Make sure we also allow index to be zero extended from a smaller than i32 element size. +define <16 x float> @zext_i8_index(float* %base, <16 x i8> %ind) { +; KNL_64-LABEL: zext_i8_index: +; KNL_64: # %bb.0: +; KNL_64-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero +; KNL_64-NEXT: vpmovzxwq {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero +; KNL_64-NEXT: vextracti128 $1, %ymm0, %xmm0 +; KNL_64-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero +; KNL_64-NEXT: kxnorw %k0, %k0, %k1 +; KNL_64-NEXT: kxnorw %k0, %k0, %k2 +; KNL_64-NEXT: vgatherqps (%rdi,%zmm0,4), %ymm2 {%k2} +; KNL_64-NEXT: vgatherqps (%rdi,%zmm1,4), %ymm0 {%k1} +; KNL_64-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0 +; KNL_64-NEXT: retq +; +; KNL_32-LABEL: zext_i8_index: +; KNL_32: # %bb.0: +; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax +; KNL_32-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero +; KNL_32-NEXT: vpmovzxwq {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero +; KNL_32-NEXT: vextracti128 $1, %ymm0, %xmm0 +; KNL_32-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero +; KNL_32-NEXT: kxnorw %k0, %k0, %k1 +; KNL_32-NEXT: kxnorw %k0, %k0, %k2 +; KNL_32-NEXT: vgatherqps (%eax,%zmm0,4), %ymm2 {%k2} +; KNL_32-NEXT: vgatherqps (%eax,%zmm1,4), %ymm0 {%k1} +; KNL_32-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0 +; KNL_32-NEXT: retl +; +; SKX-LABEL: zext_i8_index: +; SKX: # %bb.0: +; SKX-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero +; SKX-NEXT: vpmovzxwq {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero +; SKX-NEXT: vextracti128 $1, %ymm0, %xmm0 +; SKX-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero +; SKX-NEXT: kxnorw %k0, %k0, %k1 +; SKX-NEXT: kxnorw %k0, %k0, %k2 +; SKX-NEXT: vgatherqps (%rdi,%zmm0,4), %ymm2 {%k2} +; SKX-NEXT: vgatherqps (%rdi,%zmm1,4), %ymm0 {%k1} +; SKX-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0 +; SKX-NEXT: retq +; +; SKX_32-LABEL: zext_i8_index: +; SKX_32: # %bb.0: +; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax +; SKX_32-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero +; SKX_32-NEXT: vpmovzxwq {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero +; SKX_32-NEXT: vextracti128 $1, %ymm0, %xmm0 +; SKX_32-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero +; SKX_32-NEXT: kxnorw %k0, %k0, %k1 +; SKX_32-NEXT: kxnorw %k0, %k0, %k2 +; SKX_32-NEXT: vgatherqps (%eax,%zmm0,4), %ymm2 {%k2} +; SKX_32-NEXT: vgatherqps (%eax,%zmm1,4), %ymm0 {%k1} +; SKX_32-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0 +; SKX_32-NEXT: retl + + %zext_ind = zext <16 x i8> %ind to <16 x i64> + %gep.random = getelementptr float, float *%base, <16 x i64> %zext_ind + + %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> , <16 x float> undef) + ret <16 x float>%res +} + +; Make sure we also allow index to be zero extended from a smaller than i32 element size. +define <8 x float> @zext_v8i8_index(float* %base, <8 x i8> %ind) { +; KNL_64-LABEL: zext_v8i8_index: +; KNL_64: # %bb.0: +; KNL_64-NEXT: vpmovzxbq {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero,xmm0[5],zero,zero,zero,zero,zero,zero,zero,xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero +; KNL_64-NEXT: kxnorw %k0, %k0, %k1 +; KNL_64-NEXT: vgatherqps (%rdi,%zmm1,4), %ymm0 {%k1} +; KNL_64-NEXT: retq +; +; KNL_32-LABEL: zext_v8i8_index: +; KNL_32: # %bb.0: +; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax +; KNL_32-NEXT: vpmovzxbq {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero,xmm0[5],zero,zero,zero,zero,zero,zero,zero,xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero +; KNL_32-NEXT: kxnorw %k0, %k0, %k1 +; KNL_32-NEXT: vgatherqps (%eax,%zmm1,4), %ymm0 {%k1} +; KNL_32-NEXT: retl +; +; SKX-LABEL: zext_v8i8_index: +; SKX: # %bb.0: +; SKX-NEXT: vpmovzxbq {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero,xmm0[5],zero,zero,zero,zero,zero,zero,zero,xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero +; SKX-NEXT: kxnorw %k0, %k0, %k1 +; SKX-NEXT: vgatherqps (%rdi,%zmm1,4), %ymm0 {%k1} +; SKX-NEXT: retq +; +; SKX_32-LABEL: zext_v8i8_index: +; SKX_32: # %bb.0: +; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax +; SKX_32-NEXT: vpmovzxbq {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero,xmm0[5],zero,zero,zero,zero,zero,zero,zero,xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero +; SKX_32-NEXT: kxnorw %k0, %k0, %k1 +; SKX_32-NEXT: vgatherqps (%eax,%zmm1,4), %ymm0 {%k1} +; SKX_32-NEXT: retl + + %zext_ind = zext <8 x i8> %ind to <8 x i64> + %gep.random = getelementptr float, float *%base, <8 x i64> %zext_ind + + %res = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %gep.random, i32 4, <8 x i1> , <8 x float> undef) + ret <8 x float>%res +} + ; Index requires promotion define void @test_scatter_2i32_index(<2 x double> %a1, double* %base, <2 x i32> %ind, <2 x i1> %mask) { ; KNL_64-LABEL: test_scatter_2i32_index: From be7f81ece9459ed9a72e4be645d86b6ce96484ab Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 7 Oct 2019 23:03:12 +0000 Subject: [PATCH 182/254] [X86] Shrink zero extends of gather indices from type less than i32 to types larger than i32. Gather instructions can use i32 or i64 elements for indices. If the index is zero extended from a type smaller than i32 to i64, we can shrink the extend to just extend to i32. llvm-svn: 373982 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 70 +++++++------------ .../test/CodeGen/X86/masked_gather_scatter.ll | 64 ++++++----------- 2 files changed, 48 insertions(+), 86 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 052300d6f7241..54ca3721cf59b 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -42572,16 +42572,17 @@ static SDValue combineGatherScatter(SDNode *N, SelectionDAG &DAG, SDValue Base = GorS->getBasePtr(); SDValue Scale = GorS->getScale(); - // Shrink constant indices if they are larger than 32-bits. - // Only do this before legalize types since v2i64 could become v2i32. - // FIXME: We could check that the type is legal if we're after legalize types, - // but then we would need to construct test cases where that happens. - // FIXME: We could support more than just constant vectors, but we need to - // careful with costing. A truncate that can be optimized out would be fine. - // Otherwise we might only want to create a truncate if it avoids a split. if (DCI.isBeforeLegalize()) { + unsigned IndexWidth = Index.getScalarValueSizeInBits(); + + // Shrink constant indices if they are larger than 32-bits. + // Only do this before legalize types since v2i64 could become v2i32. + // FIXME: We could check that the type is legal if we're after legalize + // types, but then we would need to construct test cases where that happens. + // FIXME: We could support more than just constant vectors, but we need to + // careful with costing. A truncate that can be optimized out would be fine. + // Otherwise we might only want to create a truncate if it avoids a split. if (auto *BV = dyn_cast(Index)) { - unsigned IndexWidth = Index.getScalarValueSizeInBits(); if (BV->isConstant() && IndexWidth > 32 && DAG.ComputeNumSignBits(Index) > (IndexWidth - 32)) { unsigned NumElts = Index.getValueType().getVectorNumElements(); @@ -42604,16 +42605,18 @@ static SDValue combineGatherScatter(SDNode *N, SelectionDAG &DAG, Scatter->getIndexType()); } } - } - if (DCI.isBeforeLegalizeOps()) { - // Remove any sign extends from 32 or smaller to larger than 32. - // Only do this before LegalizeOps in case we need the sign extend for - // legalization. - if (Index.getOpcode() == ISD::SIGN_EXTEND && - Index.getScalarValueSizeInBits() > 32 && - Index.getOperand(0).getScalarValueSizeInBits() <= 32) { - Index = Index.getOperand(0); + // Shrink any sign/zero extends from 32 or smaller to larger than 32 if + // there are sufficient sign bits. Only do this before legalize types to + // avoid creating illegal types in truncate. + if ((Index.getOpcode() == ISD::SIGN_EXTEND || + Index.getOpcode() == ISD::ZERO_EXTEND) && + IndexWidth > 32 && + Index.getOperand(0).getScalarValueSizeInBits() <= 32 && + DAG.ComputeNumSignBits(Index) > (IndexWidth - 32)) { + unsigned NumElts = Index.getValueType().getVectorNumElements(); + EVT NewVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts); + Index = DAG.getNode(ISD::TRUNCATE, DL, NewVT, Index); if (auto *Gather = dyn_cast(GorS)) { SDValue Ops[] = { Chain, Gather->getPassThru(), Mask, Base, Index, Scale } ; @@ -42630,11 +42633,14 @@ static SDValue combineGatherScatter(SDNode *N, SelectionDAG &DAG, Ops, Scatter->getMemOperand(), Scatter->getIndexType()); } + } + + if (DCI.isBeforeLegalizeOps()) { + unsigned IndexWidth = Index.getScalarValueSizeInBits(); // Make sure the index is either i32 or i64 - unsigned ScalarSize = Index.getScalarValueSizeInBits(); - if (ScalarSize != 32 && ScalarSize != 64) { - MVT EltVT = ScalarSize > 32 ? MVT::i64 : MVT::i32; + if (IndexWidth != 32 && IndexWidth != 64) { + MVT EltVT = IndexWidth > 32 ? MVT::i64 : MVT::i32; EVT IndexVT = EVT::getVectorVT(*DAG.getContext(), EltVT, Index.getValueType().getVectorNumElements()); Index = DAG.getSExtOrTrunc(Index, DL, IndexVT); @@ -42654,30 +42660,6 @@ static SDValue combineGatherScatter(SDNode *N, SelectionDAG &DAG, Ops, Scatter->getMemOperand(), Scatter->getIndexType()); } - - // Try to remove zero extends from 32->64 if we know the sign bit of - // the input is zero. - if (Index.getOpcode() == ISD::ZERO_EXTEND && - Index.getScalarValueSizeInBits() == 64 && - Index.getOperand(0).getScalarValueSizeInBits() == 32 && - DAG.SignBitIsZero(Index.getOperand(0))) { - Index = Index.getOperand(0); - if (auto *Gather = dyn_cast(GorS)) { - SDValue Ops[] = { Chain, Gather->getPassThru(), - Mask, Base, Index, Scale } ; - return DAG.getMaskedGather(Gather->getVTList(), - Gather->getMemoryVT(), DL, Ops, - Gather->getMemOperand(), - Gather->getIndexType()); - } - auto *Scatter = cast(GorS); - SDValue Ops[] = { Chain, Scatter->getValue(), - Mask, Base, Index, Scale }; - return DAG.getMaskedScatter(Scatter->getVTList(), - Scatter->getMemoryVT(), DL, - Ops, Scatter->getMemOperand(), - Scatter->getIndexType()); - } } // With vector masks we only demand the upper bit of the mask. diff --git a/llvm/test/CodeGen/X86/masked_gather_scatter.ll b/llvm/test/CodeGen/X86/masked_gather_scatter.ll index fe870e51538db..2e4edb990942c 100644 --- a/llvm/test/CodeGen/X86/masked_gather_scatter.ll +++ b/llvm/test/CodeGen/X86/masked_gather_scatter.ll @@ -2693,56 +2693,32 @@ declare <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*>, i32, <8 x i1 define <16 x float> @zext_i8_index(float* %base, <16 x i8> %ind) { ; KNL_64-LABEL: zext_i8_index: ; KNL_64: # %bb.0: -; KNL_64-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero -; KNL_64-NEXT: vpmovzxwq {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero -; KNL_64-NEXT: vextracti128 $1, %ymm0, %xmm0 -; KNL_64-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero +; KNL_64-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero ; KNL_64-NEXT: kxnorw %k0, %k0, %k1 -; KNL_64-NEXT: kxnorw %k0, %k0, %k2 -; KNL_64-NEXT: vgatherqps (%rdi,%zmm0,4), %ymm2 {%k2} -; KNL_64-NEXT: vgatherqps (%rdi,%zmm1,4), %ymm0 {%k1} -; KNL_64-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0 +; KNL_64-NEXT: vgatherdps (%rdi,%zmm1,4), %zmm0 {%k1} ; KNL_64-NEXT: retq ; ; KNL_32-LABEL: zext_i8_index: ; KNL_32: # %bb.0: ; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; KNL_32-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero -; KNL_32-NEXT: vpmovzxwq {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero -; KNL_32-NEXT: vextracti128 $1, %ymm0, %xmm0 -; KNL_32-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero +; KNL_32-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero ; KNL_32-NEXT: kxnorw %k0, %k0, %k1 -; KNL_32-NEXT: kxnorw %k0, %k0, %k2 -; KNL_32-NEXT: vgatherqps (%eax,%zmm0,4), %ymm2 {%k2} -; KNL_32-NEXT: vgatherqps (%eax,%zmm1,4), %ymm0 {%k1} -; KNL_32-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0 +; KNL_32-NEXT: vgatherdps (%eax,%zmm1,4), %zmm0 {%k1} ; KNL_32-NEXT: retl ; ; SKX-LABEL: zext_i8_index: ; SKX: # %bb.0: -; SKX-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero -; SKX-NEXT: vpmovzxwq {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero -; SKX-NEXT: vextracti128 $1, %ymm0, %xmm0 -; SKX-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero +; SKX-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero ; SKX-NEXT: kxnorw %k0, %k0, %k1 -; SKX-NEXT: kxnorw %k0, %k0, %k2 -; SKX-NEXT: vgatherqps (%rdi,%zmm0,4), %ymm2 {%k2} -; SKX-NEXT: vgatherqps (%rdi,%zmm1,4), %ymm0 {%k1} -; SKX-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0 +; SKX-NEXT: vgatherdps (%rdi,%zmm1,4), %zmm0 {%k1} ; SKX-NEXT: retq ; ; SKX_32-LABEL: zext_i8_index: ; SKX_32: # %bb.0: ; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; SKX_32-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero -; SKX_32-NEXT: vpmovzxwq {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero -; SKX_32-NEXT: vextracti128 $1, %ymm0, %xmm0 -; SKX_32-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero +; SKX_32-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero ; SKX_32-NEXT: kxnorw %k0, %k0, %k1 -; SKX_32-NEXT: kxnorw %k0, %k0, %k2 -; SKX_32-NEXT: vgatherqps (%eax,%zmm0,4), %ymm2 {%k2} -; SKX_32-NEXT: vgatherqps (%eax,%zmm1,4), %ymm0 {%k1} -; SKX_32-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0 +; SKX_32-NEXT: vgatherdps (%eax,%zmm1,4), %zmm0 {%k1} ; SKX_32-NEXT: retl %zext_ind = zext <16 x i8> %ind to <16 x i64> @@ -2756,32 +2732,36 @@ define <16 x float> @zext_i8_index(float* %base, <16 x i8> %ind) { define <8 x float> @zext_v8i8_index(float* %base, <8 x i8> %ind) { ; KNL_64-LABEL: zext_v8i8_index: ; KNL_64: # %bb.0: -; KNL_64-NEXT: vpmovzxbq {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero,xmm0[5],zero,zero,zero,zero,zero,zero,zero,xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero -; KNL_64-NEXT: kxnorw %k0, %k0, %k1 -; KNL_64-NEXT: vgatherqps (%rdi,%zmm1,4), %ymm0 {%k1} +; KNL_64-NEXT: vpmovzxbd {{.*#+}} ymm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero +; KNL_64-NEXT: movw $255, %ax +; KNL_64-NEXT: kmovw %eax, %k1 +; KNL_64-NEXT: vgatherdps (%rdi,%zmm1,4), %zmm0 {%k1} +; KNL_64-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; KNL_64-NEXT: retq ; ; KNL_32-LABEL: zext_v8i8_index: ; KNL_32: # %bb.0: ; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; KNL_32-NEXT: vpmovzxbq {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero,xmm0[5],zero,zero,zero,zero,zero,zero,zero,xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero -; KNL_32-NEXT: kxnorw %k0, %k0, %k1 -; KNL_32-NEXT: vgatherqps (%eax,%zmm1,4), %ymm0 {%k1} +; KNL_32-NEXT: vpmovzxbd {{.*#+}} ymm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero +; KNL_32-NEXT: movw $255, %cx +; KNL_32-NEXT: kmovw %ecx, %k1 +; KNL_32-NEXT: vgatherdps (%eax,%zmm1,4), %zmm0 {%k1} +; KNL_32-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; KNL_32-NEXT: retl ; ; SKX-LABEL: zext_v8i8_index: ; SKX: # %bb.0: -; SKX-NEXT: vpmovzxbq {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero,xmm0[5],zero,zero,zero,zero,zero,zero,zero,xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero +; SKX-NEXT: vpmovzxbd {{.*#+}} ymm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero ; SKX-NEXT: kxnorw %k0, %k0, %k1 -; SKX-NEXT: vgatherqps (%rdi,%zmm1,4), %ymm0 {%k1} +; SKX-NEXT: vgatherdps (%rdi,%ymm1,4), %ymm0 {%k1} ; SKX-NEXT: retq ; ; SKX_32-LABEL: zext_v8i8_index: ; SKX_32: # %bb.0: ; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; SKX_32-NEXT: vpmovzxbq {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero,xmm0[5],zero,zero,zero,zero,zero,zero,zero,xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero +; SKX_32-NEXT: vpmovzxbd {{.*#+}} ymm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero ; SKX_32-NEXT: kxnorw %k0, %k0, %k1 -; SKX_32-NEXT: vgatherqps (%eax,%zmm1,4), %ymm0 {%k1} +; SKX_32-NEXT: vgatherdps (%eax,%ymm1,4), %ymm0 {%k1} ; SKX_32-NEXT: retl %zext_ind = zext <8 x i8> %ind to <8 x i64> From 2059105637867f7a02185998a30e7f8228dca280 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Mon, 7 Oct 2019 23:04:16 +0000 Subject: [PATCH 183/254] [tsan, go] break commands into multiple lines Summary: Patch by Keith Randall. Reviewers: dvyukov, vitalybuka Subscribers: delcypher, jfb, #sanitizers, llvm-commits Tags: #llvm, #sanitizers Differential Revision: https://reviews.llvm.org/D68596 llvm-svn: 373983 --- compiler-rt/lib/tsan/go/build.bat | 58 +++++++++++++++++++++++++++++-- 1 file changed, 55 insertions(+), 3 deletions(-) diff --git a/compiler-rt/lib/tsan/go/build.bat b/compiler-rt/lib/tsan/go/build.bat index ee8768b80b035..1383802036c6d 100644 --- a/compiler-rt/lib/tsan/go/build.bat +++ b/compiler-rt/lib/tsan/go/build.bat @@ -1,4 +1,56 @@ -type tsan_go.cpp ..\rtl\tsan_interface_atomic.cpp ..\rtl\tsan_clock.cpp ..\rtl\tsan_flags.cpp ..\rtl\tsan_md5.cpp ..\rtl\tsan_mutex.cpp ..\rtl\tsan_report.cpp ..\rtl\tsan_rtl.cpp ..\rtl\tsan_rtl_mutex.cpp ..\rtl\tsan_rtl_report.cpp ..\rtl\tsan_rtl_thread.cpp ..\rtl\tsan_rtl_proc.cpp ..\rtl\tsan_stat.cpp ..\rtl\tsan_suppressions.cpp ..\rtl\tsan_sync.cpp ..\rtl\tsan_stack_trace.cpp ..\..\sanitizer_common\sanitizer_allocator.cpp ..\..\sanitizer_common\sanitizer_common.cpp ..\..\sanitizer_common\sanitizer_flags.cpp ..\..\sanitizer_common\sanitizer_stacktrace.cpp ..\..\sanitizer_common\sanitizer_libc.cpp ..\..\sanitizer_common\sanitizer_printf.cpp ..\..\sanitizer_common\sanitizer_suppressions.cpp ..\..\sanitizer_common\sanitizer_thread_registry.cpp ..\rtl\tsan_platform_windows.cpp ..\..\sanitizer_common\sanitizer_win.cpp ..\..\sanitizer_common\sanitizer_deadlock_detector1.cpp ..\..\sanitizer_common\sanitizer_stackdepot.cpp ..\..\sanitizer_common\sanitizer_persistent_allocator.cpp ..\..\sanitizer_common\sanitizer_flag_parser.cpp ..\..\sanitizer_common\sanitizer_symbolizer.cpp ..\..\sanitizer_common\sanitizer_termination.cpp > gotsan.cpp - -gcc -c -o race_windows_amd64.syso gotsan.cpp -I..\rtl -I..\.. -I..\..\sanitizer_common -I..\..\..\include -m64 -Wall -fno-exceptions -fno-rtti -DSANITIZER_GO=1 -Wno-error=attributes -Wno-attributes -Wno-format -Wno-maybe-uninitialized -DSANITIZER_DEBUG=0 -O3 -fomit-frame-pointer -std=c++11 +type ^ + tsan_go.cpp ^ + ..\rtl\tsan_interface_atomic.cpp ^ + ..\rtl\tsan_clock.cpp ^ + ..\rtl\tsan_flags.cpp ^ + ..\rtl\tsan_md5.cpp ^ + ..\rtl\tsan_mutex.cpp ^ + ..\rtl\tsan_report.cpp ^ + ..\rtl\tsan_rtl.cpp ^ + ..\rtl\tsan_rtl_mutex.cpp ^ + ..\rtl\tsan_rtl_report.cpp ^ + ..\rtl\tsan_rtl_thread.cpp ^ + ..\rtl\tsan_rtl_proc.cpp ^ + ..\rtl\tsan_stat.cpp ^ + ..\rtl\tsan_suppressions.cpp ^ + ..\rtl\tsan_sync.cpp ^ + ..\rtl\tsan_stack_trace.cpp ^ + ..\..\sanitizer_common\sanitizer_allocator.cpp ^ + ..\..\sanitizer_common\sanitizer_common.cpp ^ + ..\..\sanitizer_common\sanitizer_flags.cpp ^ + ..\..\sanitizer_common\sanitizer_stacktrace.cpp ^ + ..\..\sanitizer_common\sanitizer_libc.cpp ^ + ..\..\sanitizer_common\sanitizer_printf.cpp ^ + ..\..\sanitizer_common\sanitizer_suppressions.cpp ^ + ..\..\sanitizer_common\sanitizer_thread_registry.cpp ^ + ..\rtl\tsan_platform_windows.cpp ^ + ..\..\sanitizer_common\sanitizer_win.cpp ^ + ..\..\sanitizer_common\sanitizer_deadlock_detector1.cpp ^ + ..\..\sanitizer_common\sanitizer_stackdepot.cpp ^ + ..\..\sanitizer_common\sanitizer_persistent_allocator.cpp ^ + ..\..\sanitizer_common\sanitizer_flag_parser.cpp ^ + ..\..\sanitizer_common\sanitizer_symbolizer.cpp ^ + ..\..\sanitizer_common\sanitizer_termination.cpp ^ + > gotsan.cpp +gcc ^ + -c ^ + -o race_windows_amd64.syso ^ + gotsan.cpp ^ + -I..\rtl ^ + -I..\.. ^ + -I..\..\sanitizer_common ^ + -I..\..\..\include ^ + -m64 ^ + -Wall ^ + -fno-exceptions ^ + -fno-rtti ^ + -DSANITIZER_GO=1 ^ + -Wno-error=attributes ^ + -Wno-attributes ^ + -Wno-format ^ + -Wno-maybe-uninitialized ^ + -DSANITIZER_DEBUG=0 ^ + -O3 ^ + -fomit-frame-pointer ^ + -std=c++11 From 2fdec42a167c7325f771ba340c3a5eff3d33061a Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Mon, 7 Oct 2019 23:11:07 +0000 Subject: [PATCH 184/254] [tsan, go] fix Go windows build Summary: Don't use weak exports when building tsan into a shared library for Go. gcc can't handle the pragmas used to make the weak references. Include files that have been added since the last update to build.bat. (We should really find a better way to list all the files needed.) Add windows version defines (WINVER and _WIN32_WINNT) to get AcquireSRWLockExclusive and ReleaseSRWLockExclusive defined. Define GetProcessMemoryInfo to use the kernel32 version. This is kind of a hack, the windows header files should do this translation for us. I think we're not in the right family partition (we're using Desktop, but that translation only happens for App and System partitions???), but hacking the family partition seems equally gross and I have no idea what the consequences of that might be. Patch by Keith Randall. Reviewers: dvyukov, vitalybuka Reviewed By: vitalybuka Subscribers: jfb, delcypher, #sanitizers, llvm-commits Tags: #llvm, #sanitizers Differential Revision: https://reviews.llvm.org/D68599 llvm-svn: 373984 --- .../lib/sanitizer_common/sanitizer_win_defs.h | 12 ++++++++++++ compiler-rt/lib/tsan/go/build.bat | 6 ++++++ 2 files changed, 18 insertions(+) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_win_defs.h b/compiler-rt/lib/sanitizer_common/sanitizer_win_defs.h index bcd94a08dc441..bfe38a3323674 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_win_defs.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_win_defs.h @@ -43,6 +43,8 @@ #define STRINGIFY_(A) #A #define STRINGIFY(A) STRINGIFY_(A) +#if !SANITIZER_GO + // ----------------- A workaround for the absence of weak symbols -------------- // We don't have a direct equivalent of weak symbols when using MSVC, but we can // use the /alternatename directive to tell the linker to default a specific @@ -158,5 +160,15 @@ // return a >= b; // } // + +#else // SANITIZER_GO + +// Go neither needs nor wants weak references. +// The shenanigans above don't work for gcc. +# define WIN_WEAK_EXPORT_DEF(ReturnType, Name, ...) \ + extern "C" ReturnType Name(__VA_ARGS__) + +#endif // SANITIZER_GO + #endif // SANITIZER_WINDOWS #endif // SANITIZER_WIN_DEFS_H diff --git a/compiler-rt/lib/tsan/go/build.bat b/compiler-rt/lib/tsan/go/build.bat index 1383802036c6d..bf502873b1132 100644 --- a/compiler-rt/lib/tsan/go/build.bat +++ b/compiler-rt/lib/tsan/go/build.bat @@ -31,6 +31,9 @@ type ^ ..\..\sanitizer_common\sanitizer_flag_parser.cpp ^ ..\..\sanitizer_common\sanitizer_symbolizer.cpp ^ ..\..\sanitizer_common\sanitizer_termination.cpp ^ + ..\..\sanitizer_common\sanitizer_file.cpp ^ + ..\..\sanitizer_common\sanitizer_symbolizer_report.cpp ^ + ..\rtl\tsan_external.cpp ^ > gotsan.cpp gcc ^ @@ -46,6 +49,9 @@ gcc ^ -fno-exceptions ^ -fno-rtti ^ -DSANITIZER_GO=1 ^ + -DWINVER=0x0600 ^ + -D_WIN32_WINNT=0x0600 ^ + -DGetProcessMemoryInfo=K32GetProcessMemoryInfo ^ -Wno-error=attributes ^ -Wno-attributes ^ -Wno-format ^ From 661db04b98c9905ec38a218bd421327b7b888c13 Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Mon, 7 Oct 2019 23:14:58 +0000 Subject: [PATCH 185/254] [Attributor] Use abstract call sites for call site callback Summary: When we iterate over uses of functions and expect them to be call sites, we now use abstract call sites to allow callback calls. Reviewers: sstefan1, uenoku Subscribers: hiraditya, bollu, hfinkel, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D67871 llvm-svn: 373985 --- llvm/include/llvm/IR/CallSite.h | 9 +++ llvm/include/llvm/Transforms/IPO/Attributor.h | 12 +++- llvm/lib/Transforms/IPO/Attributor.cpp | 60 +++++++++++------- .../Transforms/FunctionAttrs/callbacks.ll | 63 +++++++++++++++++++ 4 files changed, 120 insertions(+), 24 deletions(-) create mode 100644 llvm/test/Transforms/FunctionAttrs/callbacks.ll diff --git a/llvm/include/llvm/IR/CallSite.h b/llvm/include/llvm/IR/CallSite.h index b47a96c5d5faa..13b1ae8d0e326 100644 --- a/llvm/include/llvm/IR/CallSite.h +++ b/llvm/include/llvm/IR/CallSite.h @@ -854,6 +854,15 @@ class AbstractCallSite { return CI.ParameterEncoding[0]; } + /// Return the use of the callee value in the underlying instruction. Only + /// valid for callback calls! + const Use &getCalleeUseForCallback() const { + int CalleeArgIdx = getCallArgOperandNoForCallee(); + assert(CalleeArgIdx >= 0 && + unsigned(CalleeArgIdx) < getInstruction()->getNumOperands()); + return getInstruction()->getOperandUse(CalleeArgIdx); + } + /// Return the pointer to function that is being called. Value *getCalledValue() const { if (isDirectCall()) diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h index f1bba68414525..1811c233ebd9a 100644 --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -216,6 +216,16 @@ struct IRPosition { ArgNo); } + /// Create a position describing the argument of \p ACS at position \p ArgNo. + static const IRPosition callsite_argument(AbstractCallSite ACS, + unsigned ArgNo) { + int CSArgNo = ACS.getCallArgOperandNo(ArgNo); + if (CSArgNo >= 0) + return IRPosition::callsite_argument( + cast(*ACS.getInstruction()), CSArgNo); + return IRPosition(); + } + /// Create a position with function scope matching the "context" of \p IRP. /// If \p IRP is a call site (see isAnyCallSitePosition()) then the result /// will be a call site position, otherwise the function position of the @@ -825,7 +835,7 @@ struct Attributor { /// This method will evaluate \p Pred on call sites and return /// true if \p Pred holds in every call sites. However, this is only possible /// all call sites are known, hence the function has internal linkage. - bool checkForAllCallSites(const function_ref &Pred, + bool checkForAllCallSites(const function_ref &Pred, const AbstractAttribute &QueryingAA, bool RequireAllCallSites); diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp index 6b9888bcdae51..6c389f33f732c 100644 --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -596,11 +596,16 @@ static void clampCallSiteArgumentStates(Attributor &A, const AAType &QueryingAA, // The argument number which is also the call site argument number. unsigned ArgNo = QueryingAA.getIRPosition().getArgNo(); - auto CallSiteCheck = [&](CallSite CS) { - const IRPosition &CSArgPos = IRPosition::callsite_argument(CS, ArgNo); - const AAType &AA = A.getAAFor(QueryingAA, CSArgPos); - LLVM_DEBUG(dbgs() << "[Attributor] CS: " << *CS.getInstruction() - << " AA: " << AA.getAsStr() << " @" << CSArgPos << "\n"); + auto CallSiteCheck = [&](AbstractCallSite ACS) { + const IRPosition &ACSArgPos = IRPosition::callsite_argument(ACS, ArgNo); + // Check if a coresponding argument was found or if it is on not associated + // (which can happen for callback calls). + if (ACSArgPos.getPositionKind() == IRPosition::IRP_INVALID) + return false; + + const AAType &AA = A.getAAFor(QueryingAA, ACSArgPos); + LLVM_DEBUG(dbgs() << "[Attributor] ACS: " << *ACS.getInstruction() + << " AA: " << AA.getAsStr() << " @" << ACSArgPos << "\n"); const StateType &AAS = static_cast(AA.getState()); if (T.hasValue()) *T &= AAS; @@ -3100,9 +3105,12 @@ struct AAValueSimplifyArgument final : AAValueSimplifyImpl { ChangeStatus updateImpl(Attributor &A) override { bool HasValueBefore = SimplifiedAssociatedValue.hasValue(); - auto PredForCallSite = [&](CallSite CS) { - return checkAndUpdate(A, *this, *CS.getArgOperand(getArgNo()), - SimplifiedAssociatedValue); + auto PredForCallSite = [&](AbstractCallSite ACS) { + // Check if we have an associated argument or not (which can happen for + // callback calls). + if (Value *ArgOp = ACS.getCallArgOperand(getArgNo())) + return checkAndUpdate(A, *this, *ArgOp, SimplifiedAssociatedValue); + return false; }; if (!A.checkForAllCallSites(PredForCallSite, *this, true)) @@ -3914,9 +3922,9 @@ bool Attributor::isAssumedDead(const AbstractAttribute &AA, return true; } -bool Attributor::checkForAllCallSites(const function_ref &Pred, - const AbstractAttribute &QueryingAA, - bool RequireAllCallSites) { +bool Attributor::checkForAllCallSites( + const function_ref &Pred, + const AbstractAttribute &QueryingAA, bool RequireAllCallSites) { // We can try to determine information from // the call sites. However, this is only possible all call sites are known, // hence the function has internal linkage. @@ -3934,15 +3942,21 @@ bool Attributor::checkForAllCallSites(const function_ref &Pred, } for (const Use &U : AssociatedFunction->uses()) { - Instruction *I = dyn_cast(U.getUser()); - // TODO: Deal with abstract call sites here. - if (!I) + AbstractCallSite ACS(&U); + if (!ACS) { + LLVM_DEBUG(dbgs() << "[Attributor] Function " + << AssociatedFunction->getName() + << " has non call site use " << *U.get() << " in " + << *U.getUser() << "\n"); return false; + } + Instruction *I = ACS.getInstruction(); Function *Caller = I->getFunction(); - const auto &LivenessAA = getAAFor( - QueryingAA, IRPosition::function(*Caller), /* TrackDependence */ false); + const auto &LivenessAA = + getAAFor(QueryingAA, IRPosition::function(*Caller), + /* TrackDependence */ false); // Skip dead calls. if (LivenessAA.isAssumedDead(I)) { @@ -3952,22 +3966,22 @@ bool Attributor::checkForAllCallSites(const function_ref &Pred, continue; } - CallSite CS(U.getUser()); - if (!CS || !CS.isCallee(&U)) { + const Use *EffectiveUse = + ACS.isCallbackCall() ? &ACS.getCalleeUseForCallback() : &U; + if (!ACS.isCallee(EffectiveUse)) { if (!RequireAllCallSites) continue; - - LLVM_DEBUG(dbgs() << "[Attributor] User " << *U.getUser() + LLVM_DEBUG(dbgs() << "[Attributor] User " << EffectiveUse->getUser() << " is an invalid use of " << AssociatedFunction->getName() << "\n"); return false; } - if (Pred(CS)) + if (Pred(ACS)) continue; LLVM_DEBUG(dbgs() << "[Attributor] Call site callback failed for " - << *CS.getInstruction() << "\n"); + << *ACS.getInstruction() << "\n"); return false; } @@ -4319,7 +4333,7 @@ ChangeStatus Attributor::run(Module &M) { const auto *LivenessAA = lookupAAFor(IRPosition::function(*F)); if (LivenessAA && - !checkForAllCallSites([](CallSite CS) { return false; }, + !checkForAllCallSites([](AbstractCallSite ACS) { return false; }, *LivenessAA, true)) continue; diff --git a/llvm/test/Transforms/FunctionAttrs/callbacks.ll b/llvm/test/Transforms/FunctionAttrs/callbacks.ll new file mode 100644 index 0000000000000..3628bfa9daf2e --- /dev/null +++ b/llvm/test/Transforms/FunctionAttrs/callbacks.ll @@ -0,0 +1,63 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -passes=attributor -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=1 < %s | FileCheck %s +; ModuleID = 'callback_simple.c' +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" + +; Test 0 +; +; Make sure we propagate information from the caller to the callback callee but +; only for arguments that are mapped through the callback metadata. Here, the +; first two arguments of the call and the callback callee do not correspond to +; each other but argument 3-5 of the transitive call site in the caller match +; arguments 2-4 of the callback callee. Here we should see information and value +; transfer in both directions. +; FIXME: The callee -> call site direction is not working yet. + +define void @t0_caller(i32* %a) { +; CHECK: @t0_caller(i32* [[A:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 32 +; CHECK-NEXT: [[C:%.*]] = alloca i32*, align 64 +; CHECK-NEXT: [[PTR:%.*]] = alloca i32, align 128 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* +; CHECK-NEXT: store i32 42, i32* [[B]], align 32 +; CHECK-NEXT: store i32* [[B]], i32** [[C]], align 64 +; CHECK-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t0_callback_broker(i32* null, i32* nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nonnull bitcast (void (i32*, i32*, i32*, i64, i32**)* @t0_callback_callee to void (i32*, i32*, ...)*), i32* [[A:%.*]], i64 99, i32** nonnull align 64 dereferenceable(8) [[C]]) +; CHECK-NEXT: ret void +; +entry: + %b = alloca i32, align 32 + %c = alloca i32*, align 64 + %ptr = alloca i32, align 128 + %0 = bitcast i32* %b to i8* + store i32 42, i32* %b, align 4 + store i32* %b, i32** %c, align 8 + call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t0_callback_broker(i32* null, i32* %ptr, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32**)* @t0_callback_callee to void (i32*, i32*, ...)*), i32* %a, i64 99, i32** %c) + ret void +} + +; Note that the first two arguments are provided by the callback_broker according to the callback in !1 below! +; The others are annotated with alignment information, amongst others, or even replaced by the constants passed to the call. +define internal void @t0_callback_callee(i32* %is_not_null, i32* %ptr, i32* %a, i64 %b, i32** %c) { +; CHECK: @t0_callback_callee(i32* nocapture writeonly [[IS_NOT_NULL:%.*]], i32* nocapture readonly [[PTR:%.*]], i32* [[A:%.*]], i64 [[B:%.*]], i32** nocapture nonnull readonly align 64 dereferenceable(8) [[C:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[PTR_VAL:%.*]] = load i32, i32* [[PTR:%.*]], align 8 +; CHECK-NEXT: store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL:%.*]] +; CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[C:%.*]], align 64 +; CHECK-NEXT: tail call void @t0_check(i32* align 256 [[A:%.*]], i64 99, i32* [[TMP0]]) +; CHECK-NEXT: ret void +; +entry: + %ptr_val = load i32, i32* %ptr, align 8 + store i32 %ptr_val, i32* %is_not_null + %0 = load i32*, i32** %c, align 8 + tail call void @t0_check(i32* %a, i64 %b, i32* %0) + ret void +} + +declare void @t0_check(i32* align 256, i64, i32*) + +declare !callback !0 void @t0_callback_broker(i32*, i32*, void (i32*, i32*, ...)*, ...) + +!0 = !{!1} +!1 = !{i64 2, i64 -1, i64 -1, i1 true} From 766f2cc1a4bbe5008efc75d0439e8cee2813e76c Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Mon, 7 Oct 2019 23:21:52 +0000 Subject: [PATCH 186/254] [Attributor] Use local linkage instead of internal Local linkage is internal or private, and private is a specialization of internal, so either is fine for all our "local linkage" queries. llvm-svn: 373986 --- llvm/include/llvm/Transforms/IPO/Attributor.h | 4 ++-- llvm/lib/Transforms/IPO/Attributor.cpp | 8 ++++---- llvm/test/Transforms/FunctionAttrs/internal-noalias.ll | 4 ++-- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h index 1811c233ebd9a..be41f82d7d0f9 100644 --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -810,8 +810,8 @@ struct Attributor { /// This will trigger the identification and initialization of attributes for /// \p F. void markLiveInternalFunction(const Function &F) { - assert(F.hasInternalLinkage() && - "Only internal linkage is assumed dead initially."); + assert(F.hasLocalLinkage() && + "Only local linkage is assumed dead initially."); identifyDefaultAbstractAttributes(const_cast(F)); } diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp index 6c389f33f732c..0bd454d88658d 100644 --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -2081,7 +2081,7 @@ struct AAIsDeadImpl : public AAIsDead { for (const Instruction &I : BB) if (ImmutableCallSite ICS = ImmutableCallSite(&I)) if (const Function *F = ICS.getCalledFunction()) - if (F->hasInternalLinkage()) + if (F->hasLocalLinkage()) A.markLiveInternalFunction(*F); } @@ -3933,7 +3933,7 @@ bool Attributor::checkForAllCallSites( if (!AssociatedFunction) return false; - if (RequireAllCallSites && !AssociatedFunction->hasInternalLinkage()) { + if (RequireAllCallSites && !AssociatedFunction->hasLocalLinkage()) { LLVM_DEBUG( dbgs() << "[Attributor] Function " << AssociatedFunction->getName() @@ -4319,7 +4319,7 @@ ChangeStatus Attributor::run(Module &M) { // below fixpoint loop will identify and eliminate them. SmallVector InternalFns; for (Function &F : M) - if (F.hasInternalLinkage()) + if (F.hasLocalLinkage()) InternalFns.push_back(&F); bool FoundDeadFn = true; @@ -4634,7 +4634,7 @@ static bool runAttributorOnModule(Module &M, AnalysisGetter &AG) { // We look at internal functions only on-demand but if any use is not a // direct call, we have to do it eagerly. - if (F.hasInternalLinkage()) { + if (F.hasLocalLinkage()) { if (llvm::all_of(F.uses(), [](const Use &U) { return ImmutableCallSite(U.getUser()) && ImmutableCallSite(U.getUser()).isCallee(&U); diff --git a/llvm/test/Transforms/FunctionAttrs/internal-noalias.ll b/llvm/test/Transforms/FunctionAttrs/internal-noalias.ll index cc207031015fc..9eec60b5e1e69 100644 --- a/llvm/test/Transforms/FunctionAttrs/internal-noalias.ll +++ b/llvm/test/Transforms/FunctionAttrs/internal-noalias.ll @@ -8,9 +8,9 @@ entry: ret i32 %add } -; CHECK: define internal i32 @noalias_args(i32* nocapture readonly %A, i32* noalias nocapture readonly %B) +; CHECK: define private i32 @noalias_args(i32* nocapture readonly %A, i32* noalias nocapture readonly %B) -define internal i32 @noalias_args(i32* %A, i32* %B) #0 { +define private i32 @noalias_args(i32* %A, i32* %B) #0 { entry: %0 = load i32, i32* %A, align 4 %1 = load i32, i32* %B, align 4 From d4bea8830c919ea74eb2a618a0dd6a067654fb97 Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Mon, 7 Oct 2019 23:28:54 +0000 Subject: [PATCH 187/254] [Attributor][FIX] Remove initialize calls and add undefs The initialization logic has become part of the Attributor but the patches that introduced these calls here were in development when the transition happened. We also now clean up (undefine) the macros used to create attributes. llvm-svn: 373987 --- llvm/lib/Transforms/IPO/Attributor.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp index 0bd454d88658d..f314f69755ff0 100644 --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -4778,7 +4778,6 @@ const char AAMemoryBehavior::ID = 0; SWITCH_PK_INV(CLASS, IRP_CALL_SITE, "call site") \ SWITCH_PK_CREATE(CLASS, IRP, IRP_FUNCTION, Function) \ } \ - AA->initialize(A); \ return *AA; \ } @@ -4795,7 +4794,6 @@ const char AAMemoryBehavior::ID = 0; SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE_RETURNED, CallSiteReturned) \ SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE_ARGUMENT, CallSiteArgument) \ } \ - AA->initialize(A); \ return *AA; \ } @@ -4820,7 +4818,9 @@ CREATE_FUNCTION_ONLY_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAHeapToStack) CREATE_NON_RET_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAMemoryBehavior) +#undef CREATE_FUNCTION_ONLY_ABSTRACT_ATTRIBUTE_FOR_POSITION #undef CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION +#undef CREATE_NON_RET_ABSTRACT_ATTRIBUTE_FOR_POSITION #undef CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION #undef CREATE_ALL_ABSTRACT_ATTRIBUTE_FOR_POSITION #undef SWITCH_PK_CREATE From 748538e166ef64e8c9bddc7736cc9d44a5574092 Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Mon, 7 Oct 2019 23:30:04 +0000 Subject: [PATCH 188/254] [Attributor][NFC] Add debug output llvm-svn: 373988 --- llvm/lib/Transforms/IPO/Attributor.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp index f314f69755ff0..629be921fb481 100644 --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -3930,8 +3930,11 @@ bool Attributor::checkForAllCallSites( // hence the function has internal linkage. const IRPosition &IRP = QueryingAA.getIRPosition(); const Function *AssociatedFunction = IRP.getAssociatedFunction(); - if (!AssociatedFunction) + if (!AssociatedFunction) { + LLVM_DEBUG(dbgs() << "[Attributor] No function associated with " << IRP + << "\n"); return false; + } if (RequireAllCallSites && !AssociatedFunction->hasLocalLinkage()) { LLVM_DEBUG( From c8a6df71305f1c49f27371c1a4310685854c46c1 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 7 Oct 2019 23:33:08 +0000 Subject: [PATCH 189/254] AMDGPU/GlobalISel: Clamp G_SITOFP/G_UITOFP sources llvm-svn: 373989 --- .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 9 +- .../AMDGPU/GlobalISel/legalize-sitofp.mir | 396 ++++++++++++++---- .../AMDGPU/GlobalISel/legalize-uitofp.mir | 325 +++++++++++--- 3 files changed, 581 insertions(+), 149 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index dfb8ed55d6b57..c1c111a762d47 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -424,11 +424,14 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, .scalarize(0); // TODO: Split s1->s64 during regbankselect for VALU. - getActionDefinitionsBuilder({G_SITOFP, G_UITOFP}) + auto &IToFP = getActionDefinitionsBuilder({G_SITOFP, G_UITOFP}) .legalFor({{S32, S32}, {S64, S32}, {S16, S32}, {S32, S1}, {S16, S1}, {S64, S1}}) .lowerFor({{S32, S64}}) - .customFor({{S64, S64}}) - .scalarize(0); + .customFor({{S64, S64}}); + if (ST.has16BitInsts()) + IToFP.legalFor({{S16, S16}}); + IToFP.clampScalar(1, S32, S64) + .scalarize(0); auto &FPToI = getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI}) .legalFor({{S32, S32}, {S32, S64}, {S32, S16}}); diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sitofp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sitofp.mir index 5b9aa93cf9e3a..d329e7c405b64 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sitofp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sitofp.mir @@ -1,5 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel %s -o - | FileCheck %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX6 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX8 %s --- name: test_sitofp_s32_to_s32 @@ -7,10 +8,14 @@ body: | bb.0: liveins: $vgpr0 - ; CHECK-LABEL: name: test_sitofp_s32_to_s32 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[COPY]](s32) - ; CHECK: $vgpr0 = COPY [[SITOFP]](s32) + ; GFX6-LABEL: name: test_sitofp_s32_to_s32 + ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX6: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[COPY]](s32) + ; GFX6: $vgpr0 = COPY [[SITOFP]](s32) + ; GFX8-LABEL: name: test_sitofp_s32_to_s32 + ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX8: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[COPY]](s32) + ; GFX8: $vgpr0 = COPY [[SITOFP]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = G_SITOFP %0 $vgpr0 = COPY %1 @@ -22,10 +27,14 @@ body: | bb.0: liveins: $vgpr0 - ; CHECK-LABEL: name: test_sitofp_s32_to_s64 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[SITOFP:%[0-9]+]]:_(s64) = G_SITOFP [[COPY]](s32) - ; CHECK: $vgpr0_vgpr1 = COPY [[SITOFP]](s64) + ; GFX6-LABEL: name: test_sitofp_s32_to_s64 + ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX6: [[SITOFP:%[0-9]+]]:_(s64) = G_SITOFP [[COPY]](s32) + ; GFX6: $vgpr0_vgpr1 = COPY [[SITOFP]](s64) + ; GFX8-LABEL: name: test_sitofp_s32_to_s64 + ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX8: [[SITOFP:%[0-9]+]]:_(s64) = G_SITOFP [[COPY]](s32) + ; GFX8: $vgpr0_vgpr1 = COPY [[SITOFP]](s64) %0:_(s32) = COPY $vgpr0 %1:_(s64) = G_SITOFP %0 $vgpr0_vgpr1 = COPY %1 @@ -37,13 +46,20 @@ body: | bb.0: liveins: $vgpr0_vgpr1 - ; CHECK-LABEL: name: test_sitofp_v2s32_to_v2s32 - ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[UV]](s32) - ; CHECK: [[SITOFP1:%[0-9]+]]:_(s32) = G_SITOFP [[UV1]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SITOFP]](s32), [[SITOFP1]](s32) - ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX6-LABEL: name: test_sitofp_v2s32_to_v2s32 + ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX6: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[UV]](s32) + ; GFX6: [[SITOFP1:%[0-9]+]]:_(s32) = G_SITOFP [[UV1]](s32) + ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SITOFP]](s32), [[SITOFP1]](s32) + ; GFX6: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX8-LABEL: name: test_sitofp_v2s32_to_v2s32 + ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX8: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[UV]](s32) + ; GFX8: [[SITOFP1:%[0-9]+]]:_(s32) = G_SITOFP [[UV1]](s32) + ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SITOFP]](s32), [[SITOFP1]](s32) + ; GFX8: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = G_SITOFP %0 $vgpr0_vgpr1 = COPY %1 @@ -55,13 +71,20 @@ body: | bb.0: liveins: $vgpr0_vgpr1 - ; CHECK-LABEL: name: test_sitofp_v2s32_to_v2s64 - ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK: [[SITOFP:%[0-9]+]]:_(s64) = G_SITOFP [[UV]](s32) - ; CHECK: [[SITOFP1:%[0-9]+]]:_(s64) = G_SITOFP [[UV1]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SITOFP]](s64), [[SITOFP1]](s64) - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX6-LABEL: name: test_sitofp_v2s32_to_v2s64 + ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX6: [[SITOFP:%[0-9]+]]:_(s64) = G_SITOFP [[UV]](s32) + ; GFX6: [[SITOFP1:%[0-9]+]]:_(s64) = G_SITOFP [[UV1]](s32) + ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SITOFP]](s64), [[SITOFP1]](s64) + ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX8-LABEL: name: test_sitofp_v2s32_to_v2s64 + ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX8: [[SITOFP:%[0-9]+]]:_(s64) = G_SITOFP [[UV]](s32) + ; GFX8: [[SITOFP1:%[0-9]+]]:_(s64) = G_SITOFP [[UV1]](s32) + ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SITOFP]](s64), [[SITOFP1]](s64) + ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s64>) = G_SITOFP %0 $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -73,50 +96,94 @@ body: | bb.0: liveins: $vgpr0_vgpr1 - ; CHECK-LABEL: name: test_sitofp_s64_to_s32 - ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64) - ; CHECK: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[TRUNC]](s32) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; CHECK: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] - ; CHECK: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; CHECK: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]] - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[XOR]](s64) - ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 190 - ; CHECK: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[CTLZ_ZERO_UNDEF]] - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[XOR]](s64), [[C2]] - ; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[C1]] - ; CHECK: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807 - ; CHECK: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[XOR]], [[CTLZ_ZERO_UNDEF]](s32) - ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[SHL]], [[C4]] - ; CHECK: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 1099511627775 - ; CHECK: [[AND1:%[0-9]+]]:_(s64) = G_AND [[AND]], [[C5]] - ; CHECK: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 40 - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[C6]](s64) - ; CHECK: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[AND]], [[TRUNC1]](s32) - ; CHECK: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 - ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SELECT]], [[C7]](s32) - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[TRUNC2]] - ; CHECK: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 549755813888 - ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ugt), [[AND1]](s64), [[C8]] - ; CHECK: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND1]](s64), [[C8]] - ; CHECK: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C9]] - ; CHECK: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[AND2]], [[C1]] - ; CHECK: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C9]], [[SELECT1]] - ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[OR]], [[SELECT2]] - ; CHECK: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR]](s64) - ; CHECK: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[UITOFP]] - ; CHECK: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[ASHR]](s64), [[C2]] - ; CHECK: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[FNEG]], [[UITOFP]] - ; CHECK: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[COPY]](s64) - ; CHECK: $vgpr0 = COPY [[SITOFP]](s32) + ; GFX6-LABEL: name: test_sitofp_s64_to_s32 + ; GFX6: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GFX6: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 + ; GFX6: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64) + ; GFX6: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[TRUNC]](s32) + ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; GFX6: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] + ; GFX6: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; GFX6: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX6: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]] + ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX6: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[XOR]](s64) + ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 190 + ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[CTLZ_ZERO_UNDEF]] + ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[XOR]](s64), [[C2]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[C1]] + ; GFX6: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807 + ; GFX6: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[XOR]], [[CTLZ_ZERO_UNDEF]](s32) + ; GFX6: [[AND:%[0-9]+]]:_(s64) = G_AND [[SHL]], [[C4]] + ; GFX6: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 1099511627775 + ; GFX6: [[AND1:%[0-9]+]]:_(s64) = G_AND [[AND]], [[C5]] + ; GFX6: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 40 + ; GFX6: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[C6]](s64) + ; GFX6: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[AND]], [[TRUNC1]](s32) + ; GFX6: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 + ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SELECT]], [[C7]](s32) + ; GFX6: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) + ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[TRUNC2]] + ; GFX6: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 549755813888 + ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ugt), [[AND1]](s64), [[C8]] + ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND1]](s64), [[C8]] + ; GFX6: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C9]] + ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[AND2]], [[C1]] + ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C9]], [[SELECT1]] + ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[OR]], [[SELECT2]] + ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR]](s64) + ; GFX6: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[UITOFP]] + ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[ASHR]](s64), [[C2]] + ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[FNEG]], [[UITOFP]] + ; GFX6: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[COPY]](s64) + ; GFX6: $vgpr0 = COPY [[SITOFP]](s32) + ; GFX8-LABEL: name: test_sitofp_s64_to_s32 + ; GFX8: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GFX8: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 + ; GFX8: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64) + ; GFX8: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[TRUNC]](s32) + ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; GFX8: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] + ; GFX8: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; GFX8: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX8: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]] + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX8: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[XOR]](s64) + ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 190 + ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[CTLZ_ZERO_UNDEF]] + ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[XOR]](s64), [[C2]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[C1]] + ; GFX8: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807 + ; GFX8: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[XOR]], [[CTLZ_ZERO_UNDEF]](s32) + ; GFX8: [[AND:%[0-9]+]]:_(s64) = G_AND [[SHL]], [[C4]] + ; GFX8: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 1099511627775 + ; GFX8: [[AND1:%[0-9]+]]:_(s64) = G_AND [[AND]], [[C5]] + ; GFX8: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 40 + ; GFX8: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[C6]](s64) + ; GFX8: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[AND]], [[TRUNC1]](s32) + ; GFX8: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 + ; GFX8: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SELECT]], [[C7]](s32) + ; GFX8: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) + ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[TRUNC2]] + ; GFX8: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 549755813888 + ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ugt), [[AND1]](s64), [[C8]] + ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND1]](s64), [[C8]] + ; GFX8: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C9]] + ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[AND2]], [[C1]] + ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C9]], [[SELECT1]] + ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[OR]], [[SELECT2]] + ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR]](s64) + ; GFX8: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[UITOFP]] + ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[ASHR]](s64), [[C2]] + ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[FNEG]], [[UITOFP]] + ; GFX8: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[COPY]](s64) + ; GFX8: $vgpr0 = COPY [[SITOFP]](s32) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s32) = G_SITOFP %0 $vgpr0 = COPY %1 @@ -128,33 +195,196 @@ body: | bb.0: liveins: $vgpr0_vgpr1 - ; CHECK-LABEL: name: test_sitofp_s64_to_s64 - ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK: [[SITOFP:%[0-9]+]]:_(s64) = G_SITOFP [[UV1]](s32) - ; CHECK: [[UITOFP:%[0-9]+]]:_(s64) = G_UITOFP [[UV]](s32) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CHECK: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[SITOFP]](s64), [[C]](s32) - ; CHECK: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[INT]], [[UITOFP]] - ; CHECK: $vgpr0_vgpr1 = COPY [[FADD]](s64) + ; GFX6-LABEL: name: test_sitofp_s64_to_s64 + ; GFX6: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX6: [[SITOFP:%[0-9]+]]:_(s64) = G_SITOFP [[UV1]](s32) + ; GFX6: [[UITOFP:%[0-9]+]]:_(s64) = G_UITOFP [[UV]](s32) + ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; GFX6: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[SITOFP]](s64), [[C]](s32) + ; GFX6: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[INT]], [[UITOFP]] + ; GFX6: $vgpr0_vgpr1 = COPY [[FADD]](s64) + ; GFX8-LABEL: name: test_sitofp_s64_to_s64 + ; GFX8: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX8: [[SITOFP:%[0-9]+]]:_(s64) = G_SITOFP [[UV1]](s32) + ; GFX8: [[UITOFP:%[0-9]+]]:_(s64) = G_UITOFP [[UV]](s32) + ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; GFX8: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[SITOFP]](s64), [[C]](s32) + ; GFX8: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[INT]], [[UITOFP]] + ; GFX8: $vgpr0_vgpr1 = COPY [[FADD]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = G_SITOFP %0 $vgpr0_vgpr1 = COPY %1 ... +--- +name: test_sitofp_s16_to_s16 +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: test_sitofp_s16_to_s16 + ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32) + ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) + ; GFX6: [[SITOFP:%[0-9]+]]:_(s16) = G_SITOFP [[ASHR]](s32) + ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SITOFP]](s16) + ; GFX6: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-LABEL: name: test_sitofp_s16_to_s16 + ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8: [[SITOFP:%[0-9]+]]:_(s16) = G_SITOFP [[TRUNC]](s16) + ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SITOFP]](s16) + ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s16) = G_TRUNC %0 + %2:_(s16) = G_SITOFP %1 + %3:_(s32) = G_ANYEXT %2 + $vgpr0 = COPY %3 +... + --- name: test_sitofp_s16_to_s32 body: | bb.0: liveins: $vgpr0 - ; CHECK-LABEL: name: test_sitofp_s16_to_s32 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[SITOFP:%[0-9]+]]:_(s16) = G_SITOFP [[COPY]](s32) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SITOFP]](s16) - ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX6-LABEL: name: test_sitofp_s16_to_s32 + ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32) + ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) + ; GFX6: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[ASHR]](s32) + ; GFX6: $vgpr0 = COPY [[SITOFP]](s32) + ; GFX8-LABEL: name: test_sitofp_s16_to_s32 + ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32) + ; GFX8: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) + ; GFX8: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[ASHR]](s32) + ; GFX8: $vgpr0 = COPY [[SITOFP]](s32) %0:_(s32) = COPY $vgpr0 - %1:_(s16) = G_SITOFP %0 - %2:_(s32) = G_ANYEXT %1 + %1:_(s16) = G_TRUNC %0 + %2:_(s32) = G_SITOFP %1 $vgpr0 = COPY %2 ... + +--- +name: test_sitofp_s16_to_s64 +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: test_sitofp_s16_to_s64 + ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32) + ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) + ; GFX6: [[SITOFP:%[0-9]+]]:_(s64) = G_SITOFP [[ASHR]](s32) + ; GFX6: $vgpr0_vgpr1 = COPY [[SITOFP]](s64) + ; GFX8-LABEL: name: test_sitofp_s16_to_s64 + ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32) + ; GFX8: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) + ; GFX8: [[SITOFP:%[0-9]+]]:_(s64) = G_SITOFP [[ASHR]](s32) + ; GFX8: $vgpr0_vgpr1 = COPY [[SITOFP]](s64) + %0:_(s32) = COPY $vgpr0 + %1:_(s16) = G_TRUNC %0 + %2:_(s64) = G_SITOFP %1 + $vgpr0_vgpr1 = COPY %2 +... + +--- +name: test_sitofp_s8_to_s16 +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: test_sitofp_s8_to_s16 + ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32) + ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) + ; GFX6: [[SITOFP:%[0-9]+]]:_(s16) = G_SITOFP [[ASHR]](s32) + ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SITOFP]](s16) + ; GFX6: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-LABEL: name: test_sitofp_s8_to_s16 + ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32) + ; GFX8: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) + ; GFX8: [[SITOFP:%[0-9]+]]:_(s16) = G_SITOFP [[ASHR]](s32) + ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SITOFP]](s16) + ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s8) = G_TRUNC %0 + %2:_(s16) = G_SITOFP %1 + %3:_(s32) = G_ANYEXT %2 + $vgpr0 = COPY %3 +... + +--- +name: test_sitofp_s8_to_s32 +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: test_sitofp_s8_to_s32 + ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32) + ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) + ; GFX6: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[ASHR]](s32) + ; GFX6: $vgpr0 = COPY [[SITOFP]](s32) + ; GFX8-LABEL: name: test_sitofp_s8_to_s32 + ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32) + ; GFX8: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) + ; GFX8: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[ASHR]](s32) + ; GFX8: $vgpr0 = COPY [[SITOFP]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s8) = G_TRUNC %0 + %2:_(s32) = G_SITOFP %1 + $vgpr0 = COPY %2 +... + +--- +name: test_sitofp_s8_to_s64 +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: test_sitofp_s8_to_s64 + ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32) + ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) + ; GFX6: [[SITOFP:%[0-9]+]]:_(s64) = G_SITOFP [[ASHR]](s32) + ; GFX6: $vgpr0_vgpr1 = COPY [[SITOFP]](s64) + ; GFX8-LABEL: name: test_sitofp_s8_to_s64 + ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32) + ; GFX8: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) + ; GFX8: [[SITOFP:%[0-9]+]]:_(s64) = G_SITOFP [[ASHR]](s32) + ; GFX8: $vgpr0_vgpr1 = COPY [[SITOFP]](s64) + %0:_(s32) = COPY $vgpr0 + %1:_(s8) = G_TRUNC %0 + %2:_(s64) = G_SITOFP %1 + $vgpr0_vgpr1 = COPY %2 +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uitofp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uitofp.mir index 6fa7d4418a11e..4af74abb5dfe7 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uitofp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uitofp.mir @@ -1,5 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel %s -o - | FileCheck %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX6 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX8 %s --- name: test_uitofp_s32_to_s32 @@ -7,10 +8,14 @@ body: | bb.0: liveins: $vgpr0 - ; CHECK-LABEL: name: test_uitofp_s32_to_s32 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY]](s32) - ; CHECK: $vgpr0 = COPY [[UITOFP]](s32) + ; GFX6-LABEL: name: test_uitofp_s32_to_s32 + ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY]](s32) + ; GFX6: $vgpr0 = COPY [[UITOFP]](s32) + ; GFX8-LABEL: name: test_uitofp_s32_to_s32 + ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY]](s32) + ; GFX8: $vgpr0 = COPY [[UITOFP]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = G_UITOFP %0 $vgpr0 = COPY %1 @@ -22,10 +27,14 @@ body: | bb.0: liveins: $vgpr0 - ; CHECK-LABEL: name: test_uitofp_s32_to_s64 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[UITOFP:%[0-9]+]]:_(s64) = G_UITOFP [[COPY]](s32) - ; CHECK: $vgpr0_vgpr1 = COPY [[UITOFP]](s64) + ; GFX6-LABEL: name: test_uitofp_s32_to_s64 + ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX6: [[UITOFP:%[0-9]+]]:_(s64) = G_UITOFP [[COPY]](s32) + ; GFX6: $vgpr0_vgpr1 = COPY [[UITOFP]](s64) + ; GFX8-LABEL: name: test_uitofp_s32_to_s64 + ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX8: [[UITOFP:%[0-9]+]]:_(s64) = G_UITOFP [[COPY]](s32) + ; GFX8: $vgpr0_vgpr1 = COPY [[UITOFP]](s64) %0:_(s32) = COPY $vgpr0 %1:_(s64) = G_UITOFP %0 $vgpr0_vgpr1 = COPY %1 @@ -37,13 +46,20 @@ body: | bb.0: liveins: $vgpr0_vgpr1 - ; CHECK-LABEL: name: test_uitofp_v2s32_to_v2s32 - ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV]](s32) - ; CHECK: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV1]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UITOFP]](s32), [[UITOFP1]](s32) - ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX6-LABEL: name: test_uitofp_v2s32_to_v2s32 + ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV]](s32) + ; GFX6: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV1]](s32) + ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UITOFP]](s32), [[UITOFP1]](s32) + ; GFX6: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX8-LABEL: name: test_uitofp_v2s32_to_v2s32 + ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV]](s32) + ; GFX8: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV1]](s32) + ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UITOFP]](s32), [[UITOFP1]](s32) + ; GFX8: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = G_UITOFP %0 $vgpr0_vgpr1 = COPY %1 @@ -55,37 +71,68 @@ body: | bb.0: liveins: $vgpr0_vgpr1 - ; CHECK-LABEL: name: test_uitofp_s64_to_s32 - ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[COPY]](s64) - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 190 - ; CHECK: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[CTLZ_ZERO_UNDEF]] - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s64), [[C1]] - ; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[C]] - ; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807 - ; CHECK: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[CTLZ_ZERO_UNDEF]](s32) - ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[SHL]], [[C3]] - ; CHECK: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1099511627775 - ; CHECK: [[AND1:%[0-9]+]]:_(s64) = G_AND [[AND]], [[C4]] - ; CHECK: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 40 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C5]](s64) - ; CHECK: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[AND]], [[TRUNC]](s32) - ; CHECK: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 - ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SELECT]], [[C6]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[TRUNC1]] - ; CHECK: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 549755813888 - ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ugt), [[AND1]](s64), [[C7]] - ; CHECK: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND1]](s64), [[C7]] - ; CHECK: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C8]] - ; CHECK: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[AND2]], [[C]] - ; CHECK: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C8]], [[SELECT1]] - ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[OR]], [[SELECT2]] - ; CHECK: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY]](s64) - ; CHECK: $vgpr0 = COPY [[UITOFP]](s32) + ; GFX6-LABEL: name: test_uitofp_s64_to_s32 + ; GFX6: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX6: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[COPY]](s64) + ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 190 + ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[CTLZ_ZERO_UNDEF]] + ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s64), [[C1]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[C]] + ; GFX6: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807 + ; GFX6: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[CTLZ_ZERO_UNDEF]](s32) + ; GFX6: [[AND:%[0-9]+]]:_(s64) = G_AND [[SHL]], [[C3]] + ; GFX6: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1099511627775 + ; GFX6: [[AND1:%[0-9]+]]:_(s64) = G_AND [[AND]], [[C4]] + ; GFX6: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 40 + ; GFX6: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C5]](s64) + ; GFX6: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[AND]], [[TRUNC]](s32) + ; GFX6: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 + ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SELECT]], [[C6]](s32) + ; GFX6: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) + ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[TRUNC1]] + ; GFX6: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 549755813888 + ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ugt), [[AND1]](s64), [[C7]] + ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND1]](s64), [[C7]] + ; GFX6: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C8]] + ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[AND2]], [[C]] + ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C8]], [[SELECT1]] + ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[OR]], [[SELECT2]] + ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY]](s64) + ; GFX6: $vgpr0 = COPY [[UITOFP]](s32) + ; GFX8-LABEL: name: test_uitofp_s64_to_s32 + ; GFX8: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX8: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[COPY]](s64) + ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 190 + ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[CTLZ_ZERO_UNDEF]] + ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s64), [[C1]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[C]] + ; GFX8: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807 + ; GFX8: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[CTLZ_ZERO_UNDEF]](s32) + ; GFX8: [[AND:%[0-9]+]]:_(s64) = G_AND [[SHL]], [[C3]] + ; GFX8: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1099511627775 + ; GFX8: [[AND1:%[0-9]+]]:_(s64) = G_AND [[AND]], [[C4]] + ; GFX8: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 40 + ; GFX8: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C5]](s64) + ; GFX8: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[AND]], [[TRUNC]](s32) + ; GFX8: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 + ; GFX8: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SELECT]], [[C6]](s32) + ; GFX8: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) + ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[TRUNC1]] + ; GFX8: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 549755813888 + ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ugt), [[AND1]](s64), [[C7]] + ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND1]](s64), [[C7]] + ; GFX8: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C8]] + ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[AND2]], [[C]] + ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C8]], [[SELECT1]] + ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[OR]], [[SELECT2]] + ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY]](s64) + ; GFX8: $vgpr0 = COPY [[UITOFP]](s32) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s32) = G_UITOFP %0 $vgpr0 = COPY %1 @@ -97,33 +144,185 @@ body: | bb.0: liveins: $vgpr0_vgpr1 - ; CHECK-LABEL: name: test_uitofp_s64_to_s64 - ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK: [[UITOFP:%[0-9]+]]:_(s64) = G_UITOFP [[UV1]](s32) - ; CHECK: [[UITOFP1:%[0-9]+]]:_(s64) = G_UITOFP [[UV]](s32) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CHECK: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[UITOFP]](s64), [[C]](s32) - ; CHECK: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[INT]], [[UITOFP1]] - ; CHECK: $vgpr0_vgpr1 = COPY [[FADD]](s64) + ; GFX6-LABEL: name: test_uitofp_s64_to_s64 + ; GFX6: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX6: [[UITOFP:%[0-9]+]]:_(s64) = G_UITOFP [[UV1]](s32) + ; GFX6: [[UITOFP1:%[0-9]+]]:_(s64) = G_UITOFP [[UV]](s32) + ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; GFX6: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[UITOFP]](s64), [[C]](s32) + ; GFX6: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[INT]], [[UITOFP1]] + ; GFX6: $vgpr0_vgpr1 = COPY [[FADD]](s64) + ; GFX8-LABEL: name: test_uitofp_s64_to_s64 + ; GFX8: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX8: [[UITOFP:%[0-9]+]]:_(s64) = G_UITOFP [[UV1]](s32) + ; GFX8: [[UITOFP1:%[0-9]+]]:_(s64) = G_UITOFP [[UV]](s32) + ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; GFX8: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[UITOFP]](s64), [[C]](s32) + ; GFX8: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[INT]], [[UITOFP1]] + ; GFX8: $vgpr0_vgpr1 = COPY [[FADD]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = G_UITOFP %0 $vgpr0_vgpr1 = COPY %1 ... +--- +name: test_uitofp_s16_to_s16 +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: test_uitofp_s16_to_s16 + ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; GFX6: [[UITOFP:%[0-9]+]]:_(s16) = G_UITOFP [[AND]](s32) + ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UITOFP]](s16) + ; GFX6: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-LABEL: name: test_uitofp_s16_to_s16 + ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8: [[UITOFP:%[0-9]+]]:_(s16) = G_UITOFP [[TRUNC]](s16) + ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UITOFP]](s16) + ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s16) = G_TRUNC %0 + %2:_(s16) = G_UITOFP %1 + %3:_(s32) = G_ANYEXT %2 + $vgpr0 = COPY %3 +... + --- name: test_uitofp_s16_to_s32 body: | bb.0: liveins: $vgpr0 - ; CHECK-LABEL: name: test_uitofp_s16_to_s32 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[UITOFP:%[0-9]+]]:_(s16) = G_UITOFP [[COPY]](s32) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UITOFP]](s16) - ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX6-LABEL: name: test_uitofp_s16_to_s32 + ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND]](s32) + ; GFX6: $vgpr0 = COPY [[UITOFP]](s32) + ; GFX8-LABEL: name: test_uitofp_s16_to_s32 + ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND]](s32) + ; GFX8: $vgpr0 = COPY [[UITOFP]](s32) %0:_(s32) = COPY $vgpr0 - %1:_(s16) = G_UITOFP %0 - %2:_(s32) = G_ANYEXT %1 + %1:_(s16) = G_TRUNC %0 + %2:_(s32) = G_UITOFP %1 $vgpr0 = COPY %2 ... + +--- +name: test_uitofp_s16_to_s64 +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: test_uitofp_s16_to_s64 + ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; GFX6: [[UITOFP:%[0-9]+]]:_(s64) = G_UITOFP [[AND]](s32) + ; GFX6: $vgpr0_vgpr1 = COPY [[UITOFP]](s64) + ; GFX8-LABEL: name: test_uitofp_s16_to_s64 + ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; GFX8: [[UITOFP:%[0-9]+]]:_(s64) = G_UITOFP [[AND]](s32) + ; GFX8: $vgpr0_vgpr1 = COPY [[UITOFP]](s64) + %0:_(s32) = COPY $vgpr0 + %1:_(s16) = G_TRUNC %0 + %2:_(s64) = G_UITOFP %1 + $vgpr0_vgpr1 = COPY %2 +... + +--- +name: test_uitofp_s8_to_s16 +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: test_uitofp_s8_to_s16 + ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; GFX6: [[UITOFP:%[0-9]+]]:_(s16) = G_UITOFP [[AND]](s32) + ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UITOFP]](s16) + ; GFX6: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-LABEL: name: test_uitofp_s8_to_s16 + ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; GFX8: [[UITOFP:%[0-9]+]]:_(s16) = G_UITOFP [[AND]](s32) + ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UITOFP]](s16) + ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s8) = G_TRUNC %0 + %2:_(s16) = G_UITOFP %1 + %3:_(s32) = G_ANYEXT %2 + $vgpr0 = COPY %3 +... + +--- +name: test_uitofp_s8_to_s32 +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: test_uitofp_s8_to_s32 + ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND]](s32) + ; GFX6: $vgpr0 = COPY [[UITOFP]](s32) + ; GFX8-LABEL: name: test_uitofp_s8_to_s32 + ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND]](s32) + ; GFX8: $vgpr0 = COPY [[UITOFP]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s8) = G_TRUNC %0 + %2:_(s32) = G_UITOFP %1 + $vgpr0 = COPY %2 +... + +--- +name: test_uitofp_s8_to_s64 +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: test_uitofp_s8_to_s64 + ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; GFX6: [[UITOFP:%[0-9]+]]:_(s64) = G_UITOFP [[AND]](s32) + ; GFX6: $vgpr0_vgpr1 = COPY [[UITOFP]](s64) + ; GFX8-LABEL: name: test_uitofp_s8_to_s64 + ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; GFX8: [[UITOFP:%[0-9]+]]:_(s64) = G_UITOFP [[AND]](s32) + ; GFX8: $vgpr0_vgpr1 = COPY [[UITOFP]](s64) + %0:_(s32) = COPY $vgpr0 + %1:_(s8) = G_TRUNC %0 + %2:_(s64) = G_UITOFP %1 + $vgpr0_vgpr1 = COPY %2 +... From f4c7345b88f8ca56ee350a4a0dbfee7e2db79839 Mon Sep 17 00:00:00 2001 From: Alex Langford Date: Mon, 7 Oct 2019 23:43:33 +0000 Subject: [PATCH 190/254] [Symbol] Remove unused method ClangASTContext::GetObjCClassName llvm-svn: 373990 --- lldb/include/lldb/Symbol/ClangASTContext.h | 3 --- lldb/source/Symbol/ClangASTContext.cpp | 19 ------------------- 2 files changed, 22 deletions(-) diff --git a/lldb/include/lldb/Symbol/ClangASTContext.h b/lldb/include/lldb/Symbol/ClangASTContext.h index 188635a13b86f..92d8985f44602 100644 --- a/lldb/include/lldb/Symbol/ClangASTContext.h +++ b/lldb/include/lldb/Symbol/ClangASTContext.h @@ -603,9 +603,6 @@ class ClangASTContext : public TypeSystem { static bool GetCXXClassName(const CompilerType &type, std::string &class_name); - static bool GetObjCClassName(const CompilerType &type, - std::string &class_name); - // Type Completion bool GetCompleteType(lldb::opaque_compiler_type_t type) override; diff --git a/lldb/source/Symbol/ClangASTContext.cpp b/lldb/source/Symbol/ClangASTContext.cpp index ffb8d4b39c960..c0d48bd7783bf 100644 --- a/lldb/source/Symbol/ClangASTContext.cpp +++ b/lldb/source/Symbol/ClangASTContext.cpp @@ -3916,25 +3916,6 @@ bool ClangASTContext::IsObjCObjectPointerType(const CompilerType &type, return false; } -bool ClangASTContext::GetObjCClassName(const CompilerType &type, - std::string &class_name) { - if (!type) - return false; - - clang::QualType qual_type(ClangUtil::GetCanonicalQualType(type)); - - const clang::ObjCObjectType *object_type = - llvm::dyn_cast(qual_type); - if (object_type) { - const clang::ObjCInterfaceDecl *interface = object_type->getInterface(); - if (interface) { - class_name = interface->getNameAsString(); - return true; - } - } - return false; -} - // Type Completion bool ClangASTContext::GetCompleteType(lldb::opaque_compiler_type_t type) { From 61f471a705a5df3d581ba4905337f433bac3ba1f Mon Sep 17 00:00:00 2001 From: Haibo Huang Date: Mon, 7 Oct 2019 23:49:01 +0000 Subject: [PATCH 191/254] [lldb] Unifying lldb python path Based on mgorny@'s D67890 There are 3 places where python site-package path is calculated independently: 1. finishSwigPythonLLDB.py where files are written to site-packages. 2. lldb/scripts/CMakeLists.txt where site-packages are installed. 3. ScriptInterpreterPython.cpp where site-packages are added to PYTHONPATH. This change creates the path once and use it everywhere. So that they will not go out of sync. Also it provides a chance for cross compiling users to specify the right path for site-packages. Subscribers: lldb-commits Tags: #lldb Differential Revision: https://reviews.llvm.org/D68442 llvm-svn: 373991 --- lldb/CMakeLists.txt | 31 +++++++ lldb/scripts/CMakeLists.txt | 25 ----- lldb/scripts/Python/finishSwigPythonLLDB.py | 93 +------------------ lldb/scripts/finishSwigWrapperClasses.py | 19 ++-- lldb/scripts/get_relative_lib_dir.py | 44 --------- .../ScriptInterpreter/Python/CMakeLists.txt | 14 +-- .../Python/ScriptInterpreterPython.cpp | 34 ++----- .../Python/ScriptInterpreterPython.h | 3 +- 8 files changed, 49 insertions(+), 214 deletions(-) delete mode 100644 lldb/scripts/get_relative_lib_dir.py diff --git a/lldb/CMakeLists.txt b/lldb/CMakeLists.txt index 0e54188720689..90732cf9547ab 100644 --- a/lldb/CMakeLists.txt +++ b/lldb/CMakeLists.txt @@ -37,6 +37,16 @@ if (WIN32) endif() if (NOT LLDB_DISABLE_PYTHON) + execute_process( + COMMAND ${PYTHON_EXECUTABLE} + -c "import distutils.sysconfig; print(distutils.sysconfig.get_python_lib(False, False, ''))" + OUTPUT_VARIABLE LLDB_PYTHON_DEFAULT_RELATIVE_PATH + OUTPUT_STRIP_TRAILING_WHITESPACE) + + file(TO_CMAKE_PATH ${LLDB_PYTHON_DEFAULT_RELATIVE_PATH} LLDB_PYTHON_DEFAULT_RELATIVE_PATH) + set(LLDB_PYTHON_RELATIVE_PATH ${LLDB_PYTHON_DEFAULT_RELATIVE_PATH} + CACHE STRING "Path where Python modules are installed, relative to install prefix") + add_subdirectory(scripts) endif () @@ -195,6 +205,12 @@ if (NOT LLDB_DISABLE_PYTHON) get_target_property(lldb_scripts_dir swig_wrapper BINARY_DIR) get_target_property(liblldb_build_dir liblldb LIBRARY_OUTPUT_DIRECTORY) + if(LLDB_BUILD_FRAMEWORK) + set(lldb_python_build_path "${liblldb_build_dir}/LLDB.framework/Resources/Python/lldb") + else() + set(lldb_python_build_path "${CMAKE_BINARY_DIR}/${LLDB_PYTHON_RELATIVE_PATH}/lldb") + endif() + # Add a Post-Build Event to copy over Python files and create the symlink # to liblldb.so for the Python API(hardlink on Windows). add_custom_target(finish_swig ALL @@ -206,6 +222,7 @@ if (NOT LLDB_DISABLE_PYTHON) --prefix=${CMAKE_BINARY_DIR} --cmakeBuildConfiguration=${CMAKE_CFG_INTDIR} --lldbLibDir=lib${LLVM_LIBDIR_SUFFIX} + --lldbPythonPath=${lldb_python_build_path} ${use_python_wrapper_from_src_dir} ${use_six_py_from_system} VERBATIM @@ -219,6 +236,20 @@ if (NOT LLDB_DISABLE_PYTHON) # Ensure we do the python post-build step when building lldb. add_dependencies(lldb finish_swig) + if(NOT LLDB_BUILD_FRAMEWORK) + # Install the LLDB python module + add_custom_target(lldb-python-scripts) + add_dependencies(lldb-python-scripts finish_swig) + install(DIRECTORY ${CMAKE_BINARY_DIR}/${LLDB_PYTHON_RELATIVE_PATH}/ + DESTINATION ${LLDB_PYTHON_RELATIVE_PATH} + COMPONENT lldb-scripts) + if (NOT LLVM_ENABLE_IDE) + add_llvm_install_targets(install-lldb-python-scripts + COMPONENT lldb-python-scripts + DEPENDS lldb-python-scripts) + endif() + endif() + # Add a Post-Build Event to copy the custom Python DLL to the lldb binaries dir so that Windows can find it when launching # lldb.exe or any other executables that were linked with liblldb. if (WIN32 AND NOT "${PYTHON_DLL}" STREQUAL "") diff --git a/lldb/scripts/CMakeLists.txt b/lldb/scripts/CMakeLists.txt index 40ff2b285f7b7..5b86956f2a9e8 100644 --- a/lldb/scripts/CMakeLists.txt +++ b/lldb/scripts/CMakeLists.txt @@ -55,28 +55,3 @@ add_custom_target(swig_wrapper ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/lldb.py ) -if(NOT LLDB_BUILD_FRAMEWORK) - execute_process( - COMMAND ${PYTHON_EXECUTABLE} - -c "import distutils.sysconfig, sys; print(distutils.sysconfig.get_python_lib(True, False, sys.argv[1]))" - ${CMAKE_BINARY_DIR} - OUTPUT_VARIABLE SWIG_PYTHON_DIR - OUTPUT_STRIP_TRAILING_WHITESPACE) - execute_process( - COMMAND ${PYTHON_EXECUTABLE} - -c "import distutils.sysconfig; print(distutils.sysconfig.get_python_lib(True, False, ''))" - OUTPUT_VARIABLE SWIG_INSTALL_DIR - OUTPUT_STRIP_TRAILING_WHITESPACE) - - # Install the LLDB python module - add_custom_target(lldb-python-scripts) - add_dependencies(lldb-python-scripts finish_swig) - install(DIRECTORY ${SWIG_PYTHON_DIR}/ - DESTINATION ${SWIG_INSTALL_DIR} - COMPONENT lldb-scripts) - if (NOT LLVM_ENABLE_IDE) - add_llvm_install_targets(install-lldb-python-scripts - COMPONENT lldb-python-scripts - DEPENDS lldb-python-scripts) - endif() -endif() diff --git a/lldb/scripts/Python/finishSwigPythonLLDB.py b/lldb/scripts/Python/finishSwigPythonLLDB.py index d4d53a85248e1..902ad8af5dd33 100644 --- a/lldb/scripts/Python/finishSwigPythonLLDB.py +++ b/lldb/scripts/Python/finishSwigPythonLLDB.py @@ -597,86 +597,6 @@ def get_config_build_dir(vDictArgs, vstrFrameworkPythonDir): return (bOk, strConfigBldDir, strErrMsg) -#++--------------------------------------------------------------------------- -# Details: Determine where to put the files. Retrieve the directory path for -# Python's dist_packages/ site_package folder on a Windows platform. -# Args: vDictArgs - (R) Program input parameters. -# Returns: Bool - True = function success, False = failure. -# Str - Python Framework directory path. -# strErrMsg - Error description on task failure. -# Throws: None. -#-- - - -def get_framework_python_dir_windows(vDictArgs): - dbg = utilsDebug.CDebugFnVerbose( - "Python script get_framework_python_dir_windows()") - bOk = True - strWkDir = "" - strErrMsg = "" - - # We are being built by LLVM, so use the PYTHON_INSTALL_DIR argument, - # and append the python version directory to the end of it. Depending - # on the system other stuff may need to be put here as well. - from distutils.sysconfig import get_python_lib - strPythonInstallDir = "" - bHaveArgPrefix = "--prefix" in vDictArgs - if bHaveArgPrefix: - strPythonInstallDir = os.path.normpath(vDictArgs["--prefix"]) - - bHaveArgCmakeBuildConfiguration = "--cmakeBuildConfiguration" in vDictArgs - if bHaveArgCmakeBuildConfiguration: - strPythonInstallDir = os.path.join( - strPythonInstallDir, - vDictArgs["--cmakeBuildConfiguration"]) - - if strPythonInstallDir.__len__() != 0: - strWkDir = get_python_lib(True, False, strPythonInstallDir) - else: - strWkDir = get_python_lib(True, False) - strWkDir = os.path.normcase(os.path.join(strWkDir, "lldb")) - - return (bOk, strWkDir, strErrMsg) - -#++--------------------------------------------------------------------------- -# Details: Retrieve the directory path for Python's dist_packages/ -# site_package folder on a UNIX style platform. -# Args: vDictArgs - (R) Program input parameters. -# Returns: Bool - True = function success, False = failure. -# Str - Python Framework directory path. -# strErrMsg - Error description on task failure. -# Throws: None. -#-- - - -def get_framework_python_dir_other_platforms(vDictArgs): - dbg = utilsDebug.CDebugFnVerbose( - "Python script get_framework_python_dir_other_platform()") - bOk = True - strWkDir = "" - strErrMsg = "" - bDbg = "-d" in vDictArgs - - bMakeFileCalled = "-m" in vDictArgs - if bMakeFileCalled: - dbg.dump_text("Built by LLVM") - return get_framework_python_dir_windows(vDictArgs) - else: - dbg.dump_text("Built by XCode") - # We are being built by XCode, so all the lldb Python files can go - # into the LLDB.framework/Resources/Python subdirectory. - strWkDir = vDictArgs["--targetDir"] - strWkDir = os.path.join(strWkDir, "LLDB.framework") - if os.path.exists(strWkDir): - if bDbg: - print((strMsgFoundLldbFrameWkDir % strWkDir)) - strWkDir = os.path.join(strWkDir, "Resources", "Python", "lldb") - strWkDir = os.path.normcase(strWkDir) - else: - bOk = False - strErrMsg = strErrMsgFrameWkPyDirNotExist % strWkDir - - return (bOk, strWkDir, strErrMsg) #++--------------------------------------------------------------------------- # Details: Retrieve the directory path for Python's dist_packages/ @@ -694,19 +614,8 @@ def get_framework_python_dir(vDictArgs): dbg = utilsDebug.CDebugFnVerbose( "Python script get_framework_python_dir()") bOk = True - strWkDir = "" strErrMsg = "" - - eOSType = utilsOsType.determine_os_type() - if eOSType == utilsOsType.EnumOsType.Unknown: - bOk = False - strErrMsg = strErrMsgOsTypeUnknown - elif eOSType == utilsOsType.EnumOsType.Windows: - bOk, strWkDir, strErrMsg = get_framework_python_dir_windows(vDictArgs) - else: - bOk, strWkDir, strErrMsg = get_framework_python_dir_other_platforms( - vDictArgs) - + strWkDir = os.path.normpath(vDictArgs["--lldbPythonPath"]) return (bOk, strWkDir, strErrMsg) #++--------------------------------------------------------------------------- diff --git a/lldb/scripts/finishSwigWrapperClasses.py b/lldb/scripts/finishSwigWrapperClasses.py index 4d08b86e9e473..7e9166bf75ec4 100644 --- a/lldb/scripts/finishSwigWrapperClasses.py +++ b/lldb/scripts/finishSwigWrapperClasses.py @@ -179,6 +179,7 @@ def validate_arguments(vArgv): "prefix=", "cmakeBuildConfiguration=", "lldbLibDir=", + "lldbPythonPath=", "argsFile", "useSystemSix"] dictArgReq = {"-h": "o", # o = optional, m = mandatory @@ -191,7 +192,8 @@ def validate_arguments(vArgv): "--cmakeBuildConfiguration": "o", "--lldbLibDir": "o", "--argsFile": "o", - "--useSystemSix": "o"} + "--useSystemSix": "o", + "--lldbPythonPath": "m"} # Check for mandatory parameters nResult, dictArgs, strMsg = utilsArgsParse.parse(vArgv, strListArgs, @@ -293,11 +295,9 @@ def run_post_process_for_each_script_supported(vDictArgs): # Iterate script directory find any script language directories for scriptLang in listDirs: - # __pycache__ is a magic directory in Python 3 that holds .pyc files - if scriptLang != "__pycache__" and scriptLang != "swig_bot_lib": - dbg.dump_text("Executing language script for \'%s\'" % scriptLang) - nResult, strStatusMsg = run_post_process( - scriptLang, strFinishFileName, vDictArgs) + dbg.dump_text("Executing language script for \'%s\'" % scriptLang) + nResult, strStatusMsg = run_post_process( + scriptLang, strFinishFileName, vDictArgs) if nResult < 0: break @@ -337,13 +337,6 @@ def main(vArgv): if gbDbgFlag: print_out_input_parameters(dictArgs) - # Check to see if we were called from the Makefile system. If we were, check - # if the caller wants SWIG to generate a dependency file. - # Not used in this program, but passed through to the language script file - # called by this program - global gbMakeFileFlag - gbMakeFileFlag = "-m" in dictArgs - nResult, strMsg = run_post_process_for_each_script_supported(dictArgs) program_exit(nResult, strMsg) diff --git a/lldb/scripts/get_relative_lib_dir.py b/lldb/scripts/get_relative_lib_dir.py deleted file mode 100644 index 3afeeafd7b482..0000000000000 --- a/lldb/scripts/get_relative_lib_dir.py +++ /dev/null @@ -1,44 +0,0 @@ -import distutils.sysconfig -import os -import platform -import re -import sys - - -def get_python_relative_libdir(): - """Returns the appropropriate python libdir relative to the build directory. - - @param exe_path the path to the lldb executable - - @return the python path that needs to be added to sys.path (PYTHONPATH) - in order to find the lldb python module. - """ - if platform.system() != 'Linux': - return None - - # We currently have a bug in lldb -P that does not account for - # architecture variants in python paths for - # architecture-specific modules. Handle the lookup here. - # When that bug is fixed, we should just ask lldb for the - # right answer always. - arch_specific_libdir = distutils.sysconfig.get_python_lib(True, False) - split_libdir = arch_specific_libdir.split(os.sep) - lib_re = re.compile(r"^lib.*$") - - for i in range(len(split_libdir)): - match = lib_re.match(split_libdir[i]) - if match is not None: - # We'll call this the relative root of the lib dir. - # Things like RHEL will have an arch-specific python - # lib dir, which isn't 'lib' on x86_64. - return os.sep.join(split_libdir[i:]) - # Didn't resolve it. - return None - -if __name__ == '__main__': - lib_dir = get_python_relative_libdir() - if lib_dir is not None: - sys.stdout.write(lib_dir) - sys.exit(0) - else: - sys.exit(1) diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/CMakeLists.txt b/lldb/source/Plugins/ScriptInterpreter/Python/CMakeLists.txt index f7360d7288919..2c5071bc76554 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/CMakeLists.txt +++ b/lldb/source/Plugins/ScriptInterpreter/Python/CMakeLists.txt @@ -1,15 +1,7 @@ -if (NOT CMAKE_SYSTEM_NAME MATCHES "Windows") - # Call a python script to gather the arch-specific libdir for - # modules like the lldb module. - execute_process( - COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/../../../../scripts/get_relative_lib_dir.py - RESULT_VARIABLE get_libdir_status - OUTPUT_VARIABLE relative_libdir - ) - if (get_libdir_status EQUAL 0) - add_definitions(-DLLDB_PYTHON_RELATIVE_LIBDIR="${relative_libdir}") - endif() +if(NOT LLDB_PYTHON_RELATIVE_PATH) + message(FATAL_ERROR "LLDB_PYTHON_RELATIVE_PATH is not set.") endif() +add_definitions(-DLLDB_PYTHON_RELATIVE_LIBDIR="${LLDB_PYTHON_RELATIVE_PATH}") add_lldb_library(lldbPluginScriptInterpreterPython PLUGIN PythonDataObjects.cpp diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp index e5000bfd4ae50..54dc60c3ed04e 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp +++ b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp @@ -305,39 +305,20 @@ void ScriptInterpreterPython::ComputePythonDirForApple( auto rend = llvm::sys::path::rend(path_ref); auto framework = std::find(rbegin, rend, "LLDB.framework"); if (framework == rend) { - ComputePythonDirForPosix(path); + ComputePythonDir(path); return; } path.resize(framework - rend); llvm::sys::path::append(path, style, "LLDB.framework", "Resources", "Python"); } -void ScriptInterpreterPython::ComputePythonDirForPosix( +void ScriptInterpreterPython::ComputePythonDir( llvm::SmallVectorImpl &path) { - auto style = llvm::sys::path::Style::posix; -#if defined(LLDB_PYTHON_RELATIVE_LIBDIR) // Build the path by backing out of the lib dir, then building with whatever // the real python interpreter uses. (e.g. lib for most, lib64 on RHEL - // x86_64). - llvm::sys::path::remove_filename(path, style); - llvm::sys::path::append(path, style, LLDB_PYTHON_RELATIVE_LIBDIR); -#else - llvm::sys::path::append(path, style, - "python" + llvm::Twine(PY_MAJOR_VERSION) + "." + - llvm::Twine(PY_MINOR_VERSION), - "site-packages"); -#endif -} - -void ScriptInterpreterPython::ComputePythonDirForWindows( - llvm::SmallVectorImpl &path) { - auto style = llvm::sys::path::Style::windows; - llvm::sys::path::remove_filename(path, style); - llvm::sys::path::append(path, style, "lib", "site-packages"); - - // This will be injected directly through FileSpec.GetDirectory().SetString(), - // so we need to normalize manually. - std::replace(path.begin(), path.end(), '\\', '/'); + // x86_64, or bin on Windows). + llvm::sys::path::remove_filename(path); + llvm::sys::path::append(path, LLDB_PYTHON_RELATIVE_LIBDIR); } FileSpec ScriptInterpreterPython::GetPythonDir() { @@ -350,11 +331,10 @@ FileSpec ScriptInterpreterPython::GetPythonDir() { #if defined(__APPLE__) ComputePythonDirForApple(path); -#elif defined(_WIN32) - ComputePythonDirForWindows(path); #else - ComputePythonDirForPosix(path); + ComputePythonDir(path); #endif + llvm::sys::path::native(path); spec.GetDirectory().SetString(path); return spec; }(); diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.h b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.h index 24941ec774521..33ae308041b2e 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.h +++ b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.h @@ -48,8 +48,7 @@ class ScriptInterpreterPython : public ScriptInterpreter, protected: static void ComputePythonDirForApple(llvm::SmallVectorImpl &path); - static void ComputePythonDirForPosix(llvm::SmallVectorImpl &path); - static void ComputePythonDirForWindows(llvm::SmallVectorImpl &path); + static void ComputePythonDir(llvm::SmallVectorImpl &path); }; } // namespace lldb_private From 4fde20f4e41231e122a4be0b1d0d9865a3351797 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Mon, 7 Oct 2019 23:57:11 +0000 Subject: [PATCH 192/254] [clang] Accept -ftrivial-auto-var-init in clang-cl Reviewers: eugenis, rnk Subscribers: cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D68608 llvm-svn: 373992 --- clang/include/clang/Driver/Options.td | 4 ++-- clang/test/Driver/cl-options.c | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 1c4794d1ac745..8346d149dcc01 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -1715,10 +1715,10 @@ def fstack_protector : Flag<["-"], "fstack-protector">, Group, "alloca, which are of greater size than ssp-buffer-size (default: 8 bytes). " "All variable sized calls to alloca are considered vulnerable">; def ftrivial_auto_var_init : Joined<["-"], "ftrivial-auto-var-init=">, Group, - Flags<[CC1Option]>, HelpText<"Initialize trivial automatic stack variables: uninitialized (default)" + Flags<[CC1Option, CoreOption]>, HelpText<"Initialize trivial automatic stack variables: uninitialized (default)" " | pattern">, Values<"uninitialized,pattern">; def enable_trivial_var_init_zero : Joined<["-"], "enable-trivial-auto-var-init-zero-knowing-it-will-be-removed-from-clang">, - Flags<[CC1Option]>, + Flags<[CC1Option, CoreOption]>, HelpText<"Trivial automatic variable initialization to zero is only here for benchmarks, it'll eventually be removed, and I'm OK with that because I'm only using it to benchmark">; def fstandalone_debug : Flag<["-"], "fstandalone-debug">, Group, Flags<[CoreOption]>, HelpText<"Emit full debug info for all types used by the program">; diff --git a/clang/test/Driver/cl-options.c b/clang/test/Driver/cl-options.c index d11d46253c9ea..354ed998f6f5a 100644 --- a/clang/test/Driver/cl-options.c +++ b/clang/test/Driver/cl-options.c @@ -653,6 +653,8 @@ // RUN: -fcs-profile-generate \ // RUN: -fcs-profile-generate=dir \ // RUN: -ftime-trace \ +// RUN: -ftrivial-auto-var-init=zero \ +// RUN: -enable-trivial-auto-var-init-zero-knowing-it-will-be-removed-from-clang \ // RUN: --version \ // RUN: -Werror /Zs -- %s 2>&1 From 2e2c93476282990ae6b4845578ced9bf51d43e43 Mon Sep 17 00:00:00 2001 From: Evgeniy Stepanov Date: Tue, 8 Oct 2019 00:00:30 +0000 Subject: [PATCH 193/254] [msan] Add interceptors: crypt, crypt_r. Reviewers: vitalybuka Subscribers: srhines, #sanitizers, llvm-commits Tags: #sanitizers, #llvm Differential Revision: https://reviews.llvm.org/D68431 llvm-svn: 373993 --- .../sanitizer_common_interceptors.inc | 37 +++++++++++++++++++ .../sanitizer_platform_interceptors.h | 2 + .../sanitizer_platform_limits_posix.cpp | 2 + .../sanitizer_platform_limits_posix.h | 1 + .../TestCases/Linux/crypt_r.cpp | 37 +++++++++++++++++++ .../TestCases/Posix/crypt.cpp | 26 +++++++++++++ 6 files changed, 105 insertions(+) create mode 100644 compiler-rt/test/sanitizer_common/TestCases/Linux/crypt_r.cpp create mode 100644 compiler-rt/test/sanitizer_common/TestCases/Posix/crypt.cpp diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc index 587b1ea227226..7cae94559075e 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc +++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc @@ -9573,6 +9573,41 @@ INTERCEPTOR(SSIZE_T, getrandom, void *buf, SIZE_T buflen, unsigned int flags) { #define INIT_GETRANDOM #endif +#if SANITIZER_INTERCEPT_CRYPT +INTERCEPTOR(char *, crypt, char *key, char *salt) { + void *ctx; + COMMON_INTERCEPTOR_ENTER(ctx, crypt, key, salt); + COMMON_INTERCEPTOR_READ_RANGE(ctx, key, internal_strlen(key) + 1); + COMMON_INTERCEPTOR_READ_RANGE(ctx, salt, internal_strlen(salt) + 1); + char *res = REAL(crypt)(key, salt); + if (res != nullptr) + COMMON_INTERCEPTOR_INITIALIZE_RANGE(res, internal_strlen(res) + 1); + return res; +} +#define INIT_CRYPT COMMON_INTERCEPT_FUNCTION(crypt); +#else +#define INIT_CRYPT +#endif + +#if SANITIZER_INTERCEPT_CRYPT_R +INTERCEPTOR(char *, crypt_r, char *key, char *salt, void *data) { + void *ctx; + COMMON_INTERCEPTOR_ENTER(ctx, crypt_r, key, salt, data); + COMMON_INTERCEPTOR_READ_RANGE(ctx, key, internal_strlen(key) + 1); + COMMON_INTERCEPTOR_READ_RANGE(ctx, salt, internal_strlen(salt) + 1); + char *res = REAL(crypt_r)(key, salt, data); + if (res != nullptr) { + COMMON_INTERCEPTOR_WRITE_RANGE(ctx, data, + __sanitizer::struct_crypt_data_sz); + COMMON_INTERCEPTOR_INITIALIZE_RANGE(res, internal_strlen(res) + 1); + } + return res; +} +#define INIT_CRYPT_R COMMON_INTERCEPT_FUNCTION(crypt_r); +#else +#define INIT_CRYPT_R +#endif + static void InitializeCommonInterceptors() { #if SI_POSIX static u64 metadata_mem[sizeof(MetadataHashMap) / sizeof(u64) + 1]; @@ -9871,6 +9906,8 @@ static void InitializeCommonInterceptors() { INIT_GETUSERSHELL; INIT_SL_INIT; INIT_GETRANDOM; + INIT_CRYPT; + INIT_CRYPT_R; INIT___PRINTF_CHK; } diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h b/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h index 36885da98a07b..54a1699f5c44e 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h @@ -566,6 +566,8 @@ #define SANITIZER_INTERCEPT_FDEVNAME SI_FREEBSD #define SANITIZER_INTERCEPT_GETUSERSHELL (SI_POSIX && !SI_ANDROID) #define SANITIZER_INTERCEPT_SL_INIT (SI_FREEBSD || SI_NETBSD) +#define SANITIZER_INTERCEPT_CRYPT (SI_POSIX && !SI_ANDROID) +#define SANITIZER_INTERCEPT_CRYPT_R (SI_LINUX && !SI_ANDROID) #define SANITIZER_INTERCEPT_GETRANDOM (SI_LINUX && __GLIBC_PREREQ(2, 25)) #define SANITIZER_INTERCEPT___CXA_ATEXIT SI_NETBSD diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp index f50f31dcd66e7..84058c70811fa 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp @@ -140,6 +140,7 @@ typedef struct user_fpregs elf_fpregset_t; #include #include #include +#include #endif // SANITIZER_LINUX && !SANITIZER_ANDROID #if SANITIZER_ANDROID @@ -240,6 +241,7 @@ namespace __sanitizer { unsigned struct_ustat_sz = SIZEOF_STRUCT_USTAT; unsigned struct_rlimit64_sz = sizeof(struct rlimit64); unsigned struct_statvfs64_sz = sizeof(struct statvfs64); + unsigned struct_crypt_data_sz = sizeof(struct crypt_data); #endif // SANITIZER_LINUX && !SANITIZER_ANDROID #if SANITIZER_LINUX && !SANITIZER_ANDROID diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h index b92fef395f4fd..db2c4f07b3ae4 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h @@ -304,6 +304,7 @@ extern unsigned struct_msqid_ds_sz; extern unsigned struct_mq_attr_sz; extern unsigned struct_timex_sz; extern unsigned struct_statvfs_sz; +extern unsigned struct_crypt_data_sz; #endif // SANITIZER_LINUX && !SANITIZER_ANDROID struct __sanitizer_iovec { diff --git a/compiler-rt/test/sanitizer_common/TestCases/Linux/crypt_r.cpp b/compiler-rt/test/sanitizer_common/TestCases/Linux/crypt_r.cpp new file mode 100644 index 0000000000000..b90b13b45cd9a --- /dev/null +++ b/compiler-rt/test/sanitizer_common/TestCases/Linux/crypt_r.cpp @@ -0,0 +1,37 @@ +// RUN: %clangxx -O0 -g %s -lcrypt -o %t && %run %t + +#include +#include +#include +#include + +#include + +int +main (int argc, char** argv) +{ + { + crypt_data cd; + cd.initialized = 0; + char *p = crypt_r("abcdef", "xz", &cd); + volatile size_t z = strlen(p); + } + { + crypt_data cd; + cd.initialized = 0; + char *p = crypt_r("abcdef", "$1$", &cd); + volatile size_t z = strlen(p); + } + { + crypt_data cd; + cd.initialized = 0; + char *p = crypt_r("abcdef", "$5$", &cd); + volatile size_t z = strlen(p); + } + { + crypt_data cd; + cd.initialized = 0; + char *p = crypt_r("abcdef", "$6$", &cd); + volatile size_t z = strlen(p); + } +} diff --git a/compiler-rt/test/sanitizer_common/TestCases/Posix/crypt.cpp b/compiler-rt/test/sanitizer_common/TestCases/Posix/crypt.cpp new file mode 100644 index 0000000000000..7b36741b6ba29 --- /dev/null +++ b/compiler-rt/test/sanitizer_common/TestCases/Posix/crypt.cpp @@ -0,0 +1,26 @@ +// RUN: %clangxx -O0 -g %s -o %t -lcrypt && %run %t + +#include +#include +#include + +int +main (int argc, char** argv) +{ + { + char *p = crypt("abcdef", "xz"); + volatile size_t z = strlen(p); + } + { + char *p = crypt("abcdef", "$1$"); + volatile size_t z = strlen(p); + } + { + char *p = crypt("abcdef", "$5$"); + volatile size_t z = strlen(p); + } + { + char *p = crypt("abcdef", "$6$"); + volatile size_t z = strlen(p); + } +} From 4bddca306a451f2577ec62dd73c30b5e91a8fb32 Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Tue, 8 Oct 2019 00:13:59 +0000 Subject: [PATCH 194/254] [MachO] Fix symbol merging during symtab parsing. The symtab parser in ObjectFileMachO has logic to coalesce debug (STAB) and non-debug symbols, based on the address and the symbol name for static (STSYM) and global symbols (GSYM) respectively. It makes the assumption that the debug variant is always encountered first. Rather than creating a second entry in the symbol table for the non-debug symbol, the latter gets merged into the existing debug symbol. This breaks when the linker emits the non-debug symbol first. We'd end up with two entries in the symbol table, each containing part of the information LLDB relies on. Indeed, commenting out the merging logic breaks the test suite spectacularly. This patch solves that problem by always parsing the debug symbols first. This guarantees that the assumption for merging holds. I'm not particularly happy with adding a lambda, but after numerous attempts this is the best solution I could come up with. The symtab parsing logic is pretty complex in that it touches a lot of things. I've experienced first hand that it's very easy to break things. I believe this approach strikes a balance between fixing the issue while limiting the risk of regressions. Differential revision: https://reviews.llvm.org/D68536 llvm-svn: 373994 --- lldb/lit/ObjectFile/MachO/symtab.yaml | 699 ++++++++++++++++++ .../ObjectFile/Mach-O/ObjectFileMachO.cpp | 78 +- 2 files changed, 754 insertions(+), 23 deletions(-) create mode 100644 lldb/lit/ObjectFile/MachO/symtab.yaml diff --git a/lldb/lit/ObjectFile/MachO/symtab.yaml b/lldb/lit/ObjectFile/MachO/symtab.yaml new file mode 100644 index 0000000000000..e50ba6b6c224a --- /dev/null +++ b/lldb/lit/ObjectFile/MachO/symtab.yaml @@ -0,0 +1,699 @@ +# Tests that the symbol table properly merges the two entries for +# global_constant, even when the debug symbol comes last. +# RUN: mkdir -p %t +# RUN: yaml2obj %s -o %t/a.out +# RUN: %lldb %t/a.out -o "target modules dump symtab a.out" | FileCheck %s +# CHECK: global_constant +# CHECK-NOT: global_constant +--- !mach-o +FileHeader: + magic: 0xFEEDFACF + cputype: 0x01000007 + cpusubtype: 0x80000003 + filetype: 0x00000002 + ncmds: 20 + sizeofcmds: 1992 + flags: 0x00200085 + reserved: 0x00000000 +LoadCommands: + - cmd: LC_SEGMENT_64 + cmdsize: 72 + segname: __PAGEZERO + vmaddr: 0 + vmsize: 4294967296 + fileoff: 0 + filesize: 0 + maxprot: 0 + initprot: 0 + nsects: 0 + flags: 0 + - cmd: LC_SEGMENT_64 + cmdsize: 552 + segname: __TEXT + vmaddr: 4294967296 + vmsize: 4096 + fileoff: 0 + filesize: 4096 + maxprot: 5 + initprot: 5 + nsects: 6 + flags: 0 + Sections: + - sectname: __text + segname: __TEXT + addr: 0x0000000100000DE0 + size: 383 + offset: 0x00000DE0 + align: 4 + reloff: 0x00000000 + nreloc: 0 + flags: 0x80000400 + reserved1: 0x00000000 + reserved2: 0x00000000 + reserved3: 0x00000000 + content: 554889E54883EC30488D054112000031C948C7052C1200005704000048C70529120000AE080000488B151A120000897DE44889C7488D45E8488975D84889C6B821000000488955D04889C2E830010000488B7DD0488D35F5110000E840000000488D7DE8E81D01000031C04883C4305DC3662E0F1F8400000000000F1F440000554889E548C745F000000000488975F848897DF05DC3662E0F1F840000000000554889E54883EC3048C745F80000000048C745F00000000048C745E80000000048897DF8488975F0488B06488945E848897DD8488945E0488D3D62110000E81D000000488D7DD84889C6E891FFFFFF4883C4305DC3662E0F1F840000000000904883EC284889F8488B0F4883F9000F9CC2F6C2014889CE48897C2420488944241848894C24104889742408750A488B4424084883C428C3488B44241048C1F82048F7D8488B4C241089CA4863F2488B7C24204801F731D289D6488934244889C6488B1424488B0C24E81F0000004889C1488B54241848890248894C2408EBAE + - sectname: __stubs + segname: __TEXT + addr: 0x0000000100000F60 + size: 18 + offset: 0x00000F60 + align: 1 + reloff: 0x00000000 + nreloc: 0 + flags: 0x80000408 + reserved1: 0x00000000 + reserved2: 0x00000006 + reserved3: 0x00000000 + content: FF259A100000FF259C100000FF259E100000 + - sectname: __stub_helper + segname: __TEXT + addr: 0x0000000100000F74 + size: 46 + offset: 0x00000F74 + align: 2 + reloff: 0x00000000 + nreloc: 0 + flags: 0x80000400 + reserved1: 0x00000000 + reserved2: 0x00000000 + reserved3: 0x00000000 + content: 4C8D1D9D1000004153FF257D000000906800000000E9E6FFFFFF6819000000E9DCFFFFFF6830000000E9D2FFFFFF + - sectname: __swift5_typeref + segname: __TEXT + addr: 0x0000000100000FA2 + size: 7 + offset: 0x00000FA2 + align: 1 + reloff: 0x00000000 + nreloc: 0 + flags: 0x00000000 + reserved1: 0x00000000 + reserved2: 0x00000000 + reserved3: 0x00000000 + content: 53695F53697400 + - sectname: __const + segname: __TEXT + addr: 0x0000000100000FAA + size: 2 + offset: 0x00000FAA + align: 1 + reloff: 0x00000000 + nreloc: 0 + flags: 0x00000000 + reserved1: 0x00000000 + reserved2: 0x00000000 + reserved3: 0x00000000 + content: '0300' + - sectname: __unwind_info + segname: __TEXT + addr: 0x0000000100000FAC + size: 80 + offset: 0x00000FAC + align: 2 + reloff: 0x00000000 + nreloc: 0 + flags: 0x00000000 + reserved1: 0x00000000 + reserved2: 0x00000000 + reserved3: 0x00000000 + content: 010000001C000000000000001C000000000000001C00000002000000E00D00003400000034000000600F00000000000034000000030000000C0002001400020000000001000100000000060200000001 + - cmd: LC_SEGMENT_64 + cmdsize: 232 + segname: __DATA_CONST + vmaddr: 4294971392 + vmsize: 4096 + fileoff: 4096 + filesize: 4096 + maxprot: 3 + initprot: 3 + nsects: 2 + flags: 16 + Sections: + - sectname: __got + segname: __DATA_CONST + addr: 0x0000000100001000 + size: 8 + offset: 0x00001000 + align: 3 + reloff: 0x00000000 + nreloc: 0 + flags: 0x00000006 + reserved1: 0x00000003 + reserved2: 0x00000000 + reserved3: 0x00000000 + content: '0000000000000000' + - sectname: __objc_imageinfo + segname: __DATA_CONST + addr: 0x0000000100001008 + size: 8 + offset: 0x00001008 + align: 2 + reloff: 0x00000000 + nreloc: 0 + flags: 0x00000000 + reserved1: 0x00000000 + reserved2: 0x00000000 + reserved3: 0x00000000 + content: '0000000040070105' + - cmd: LC_SEGMENT_64 + cmdsize: 312 + segname: __DATA + vmaddr: 4294975488 + vmsize: 4096 + fileoff: 8192 + filesize: 4096 + maxprot: 3 + initprot: 3 + nsects: 3 + flags: 0 + Sections: + - sectname: __la_symbol_ptr + segname: __DATA + addr: 0x0000000100002000 + size: 24 + offset: 0x00002000 + align: 3 + reloff: 0x00000000 + nreloc: 0 + flags: 0x00000007 + reserved1: 0x00000004 + reserved2: 0x00000000 + reserved3: 0x00000000 + content: 840F0000010000008E0F000001000000980F000001000000 + - sectname: __data + segname: __DATA + addr: 0x0000000100002018 + size: 16 + offset: 0x00002018 + align: 3 + reloff: 0x00000000 + nreloc: 0 + flags: 0x00000000 + reserved1: 0x00000000 + reserved2: 0x00000000 + reserved3: 0x00000000 + content: 000000000000000082EFFFFFFAFFFFFF + - sectname: __common + segname: __DATA + addr: 0x0000000100002028 + size: 16 + offset: 0x00000000 + align: 3 + reloff: 0x00000000 + nreloc: 0 + flags: 0x00000001 + reserved1: 0x00000000 + reserved2: 0x00000000 + reserved3: 0x00000000 + - cmd: LC_SEGMENT_64 + cmdsize: 72 + segname: __LINKEDIT + vmaddr: 4294979584 + vmsize: 4096 + fileoff: 12288 + filesize: 1924 + maxprot: 1 + initprot: 1 + nsects: 0 + flags: 0 + - cmd: LC_DYLD_INFO_ONLY + cmdsize: 48 + rebase_off: 12288 + rebase_size: 8 + bind_off: 12296 + bind_size: 24 + weak_bind_off: 0 + weak_bind_size: 0 + lazy_bind_off: 12320 + lazy_bind_size: 96 + export_off: 12416 + export_size: 48 + - cmd: LC_SYMTAB + cmdsize: 24 + symoff: 12472 + nsyms: 41 + stroff: 13156 + strsize: 1056 + - cmd: LC_DYSYMTAB + cmdsize: 80 + ilocalsym: 0 + nlocalsym: 35 + iextdefsym: 35 + nextdefsym: 2 + iundefsym: 37 + nundefsym: 4 + tocoff: 0 + ntoc: 0 + modtaboff: 0 + nmodtab: 0 + extrefsymoff: 0 + nextrefsyms: 0 + indirectsymoff: 13128 + nindirectsyms: 7 + extreloff: 0 + nextrel: 0 + locreloff: 0 + nlocrel: 0 + - cmd: LC_LOAD_DYLINKER + cmdsize: 32 + name: 12 + PayloadString: '/usr/lib/dyld' + ZeroPadBytes: 7 + - cmd: LC_UUID + cmdsize: 24 + uuid: A89F9D23-8190-3946-A70B-B8E833E68640 + - cmd: LC_BUILD_VERSION + cmdsize: 32 + platform: 1 + minos: 659200 + sdk: 659200 + ntools: 1 + Tools: + - tool: 3 + version: 36176384 + - cmd: LC_SOURCE_VERSION + cmdsize: 16 + version: 0 + - cmd: LC_MAIN + cmdsize: 24 + entryoff: 3552 + stacksize: 0 + - cmd: LC_LOAD_DYLIB + cmdsize: 56 + dylib: + name: 24 + timestamp: 2 + current_version: 14942208 + compatibility_version: 65536 + PayloadString: '/usr/lib/libobjc.A.dylib' + ZeroPadBytes: 8 + - cmd: LC_LOAD_DYLIB + cmdsize: 56 + dylib: + name: 24 + timestamp: 2 + current_version: 83951616 + compatibility_version: 65536 + PayloadString: '/usr/lib/libSystem.B.dylib' + ZeroPadBytes: 6 + - cmd: LC_LOAD_DYLIB + cmdsize: 64 + dylib: + name: 24 + timestamp: 2 + current_version: 0 + compatibility_version: 65536 + PayloadString: '/usr/lib/swift/libswiftCore.dylib' + ZeroPadBytes: 7 + - cmd: LC_RPATH + cmdsize: 136 + path: 12 + PayloadString: '/Volumes/Jonas/internal-stable/build/Ninja-RelWithDebInfoAssert+stdlib-RelWithDebInfo/swift-macosx-x86_64/lib/swift/macosx' + ZeroPadBytes: 2 + - cmd: LC_RPATH + cmdsize: 128 + path: 12 + PayloadString: '/Applications/XcodeY.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX10.15.sdk/usr/lib/swift' + ZeroPadBytes: 2 + - cmd: LC_FUNCTION_STARTS + cmdsize: 16 + dataoff: 12464 + datasize: 8 + - cmd: LC_DATA_IN_CODE + cmdsize: 16 + dataoff: 12472 + datasize: 0 +LinkEditData: + RebaseOpcodes: + - Opcode: REBASE_OPCODE_SET_TYPE_IMM + Imm: 1 + - Opcode: REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB + Imm: 3 + ExtraData: + - 0x0000000000000000 + - Opcode: REBASE_OPCODE_DO_REBASE_IMM_TIMES + Imm: 3 + - Opcode: REBASE_OPCODE_DONE + Imm: 0 + BindOpcodes: + - Opcode: BIND_OPCODE_SET_DYLIB_ORDINAL_IMM + Imm: 2 + Symbol: '' + - Opcode: BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM + Imm: 0 + Symbol: dyld_stub_binder + - Opcode: BIND_OPCODE_SET_TYPE_IMM + Imm: 1 + Symbol: '' + - Opcode: BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB + Imm: 2 + ULEBExtraData: + - 0x0000000000000000 + Symbol: '' + - Opcode: BIND_OPCODE_DO_BIND + Imm: 0 + Symbol: '' + - Opcode: BIND_OPCODE_DONE + Imm: 0 + Symbol: '' + LazyBindOpcodes: + - Opcode: BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB + Imm: 3 + ULEBExtraData: + - 0x0000000000000000 + Symbol: '' + - Opcode: BIND_OPCODE_SET_DYLIB_ORDINAL_IMM + Imm: 3 + Symbol: '' + - Opcode: BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM + Imm: 0 + Symbol: _swift_beginAccess + - Opcode: BIND_OPCODE_DO_BIND + Imm: 0 + Symbol: '' + - Opcode: BIND_OPCODE_DONE + Imm: 0 + Symbol: '' + - Opcode: BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB + Imm: 3 + ULEBExtraData: + - 0x0000000000000008 + Symbol: '' + - Opcode: BIND_OPCODE_SET_DYLIB_ORDINAL_IMM + Imm: 3 + Symbol: '' + - Opcode: BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM + Imm: 0 + Symbol: _swift_endAccess + - Opcode: BIND_OPCODE_DO_BIND + Imm: 0 + Symbol: '' + - Opcode: BIND_OPCODE_DONE + Imm: 0 + Symbol: '' + - Opcode: BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB + Imm: 3 + ULEBExtraData: + - 0x0000000000000010 + Symbol: '' + - Opcode: BIND_OPCODE_SET_DYLIB_ORDINAL_IMM + Imm: 3 + Symbol: '' + - Opcode: BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM + Imm: 0 + Symbol: _swift_getTypeByMangledNameInContext + - Opcode: BIND_OPCODE_DO_BIND + Imm: 0 + Symbol: '' + - Opcode: BIND_OPCODE_DONE + Imm: 0 + Symbol: '' + - Opcode: BIND_OPCODE_DONE + Imm: 0 + Symbol: '' + - Opcode: BIND_OPCODE_DONE + Imm: 0 + Symbol: '' + - Opcode: BIND_OPCODE_DONE + Imm: 0 + Symbol: '' + - Opcode: BIND_OPCODE_DONE + Imm: 0 + Symbol: '' + - Opcode: BIND_OPCODE_DONE + Imm: 0 + Symbol: '' + ExportTrie: + TerminalSize: 0 + NodeOffset: 0 + Name: '' + Flags: 0x0000000000000000 + Address: 0x0000000000000000 + Other: 0x0000000000000000 + ImportName: '' + Children: + - TerminalSize: 0 + NodeOffset: 5 + Name: _ + Flags: 0x0000000000000000 + Address: 0x0000000000000000 + Other: 0x0000000000000000 + ImportName: '' + Children: + - TerminalSize: 2 + NodeOffset: 33 + Name: _mh_execute_header + Flags: 0x0000000000000000 + Address: 0x0000000000000000 + Other: 0x0000000000000000 + ImportName: '' + - TerminalSize: 3 + NodeOffset: 37 + Name: main + Flags: 0x0000000000000000 + Address: 0x0000000000000DE0 + Other: 0x0000000000000000 + ImportName: '' + NameList: + - n_strx: 118 + n_type: 0x1E + n_sect: 1 + n_desc: 0 + n_value: 4294970976 + - n_strx: 133 + n_type: 0x1E + n_sect: 1 + n_desc: 0 + n_value: 4294971008 + - n_strx: 151 + n_type: 0x1E + n_sect: 1 + n_desc: 128 + n_value: 4294971104 + - n_strx: 199 + n_type: 0x1E + n_sect: 4 + n_desc: 128 + n_value: 4294971298 + - n_strx: 216 + n_type: 0x1E + n_sect: 5 + n_desc: 128 + n_value: 4294971306 + - n_strx: 244 + n_type: 0x0E + n_sect: 10 + n_desc: 0 + n_value: 4294975512 + - n_strx: 259 + n_type: 0x1E + n_sect: 10 + n_desc: 128 + n_value: 4294975520 + - n_strx: 271 + n_type: 0x1E + n_sect: 11 + n_desc: 0 + n_value: 4294975528 + - n_strx: 298 + n_type: 0x1E + n_sect: 11 + n_desc: 0 + n_value: 4294975536 + - n_strx: 325 + n_type: 0x32 + n_sect: 0 + n_desc: 0 + n_value: 1570141141 + - n_strx: 540 + n_type: 0x64 + n_sect: 0 + n_desc: 0 + n_value: 0 + - n_strx: 633 + n_type: 0x64 + n_sect: 0 + n_desc: 0 + n_value: 0 + - n_strx: 644 + n_type: 0x66 + n_sect: 3 + n_desc: 1 + n_value: 1570141203 + - n_strx: 1 + n_type: 0x2E + n_sect: 1 + n_desc: 0 + n_value: 4294970848 + - n_strx: 852 + n_type: 0x24 + n_sect: 1 + n_desc: 0 + n_value: 4294970848 + - n_strx: 1 + n_type: 0x24 + n_sect: 0 + n_desc: 0 + n_value: 128 + - n_strx: 1 + n_type: 0x4E + n_sect: 1 + n_desc: 0 + n_value: 128 + - n_strx: 1 + n_type: 0x2E + n_sect: 1 + n_desc: 0 + n_value: 4294970976 + - n_strx: 858 + n_type: 0x24 + n_sect: 1 + n_desc: 0 + n_value: 4294970976 + - n_strx: 1 + n_type: 0x24 + n_sect: 0 + n_desc: 0 + n_value: 32 + - n_strx: 1 + n_type: 0x4E + n_sect: 1 + n_desc: 0 + n_value: 32 + - n_strx: 1 + n_type: 0x2E + n_sect: 1 + n_desc: 0 + n_value: 4294971008 + - n_strx: 873 + n_type: 0x24 + n_sect: 1 + n_desc: 0 + n_value: 4294971008 + - n_strx: 1 + n_type: 0x24 + n_sect: 0 + n_desc: 0 + n_value: 96 + - n_strx: 1 + n_type: 0x4E + n_sect: 1 + n_desc: 0 + n_value: 96 + - n_strx: 1 + n_type: 0x2E + n_sect: 1 + n_desc: 0 + n_value: 4294971104 + - n_strx: 891 + n_type: 0x24 + n_sect: 1 + n_desc: 0 + n_value: 4294971104 + - n_strx: 1 + n_type: 0x24 + n_sect: 0 + n_desc: 0 + n_value: 127 + - n_strx: 1 + n_type: 0x4E + n_sect: 1 + n_desc: 0 + n_value: 127 + - n_strx: 939 + n_type: 0x20 + n_sect: 0 + n_desc: 0 + n_value: 0 + - n_strx: 956 + n_type: 0x20 + n_sect: 0 + n_desc: 0 + n_value: 0 + - n_strx: 984 + n_type: 0x20 + n_sect: 0 + n_desc: 0 + n_value: 0 + - n_strx: 996 + n_type: 0x20 + n_sect: 0 + n_desc: 0 + n_value: 0 + - n_strx: 1023 + n_type: 0x20 + n_sect: 0 + n_desc: 0 + n_value: 0 + - n_strx: 1 + n_type: 0x64 + n_sect: 1 + n_desc: 0 + n_value: 0 + - n_strx: 2 + n_type: 0x0F + n_sect: 1 + n_desc: 16 + n_value: 4294967296 + - n_strx: 22 + n_type: 0x0F + n_sect: 1 + n_desc: 0 + n_value: 4294970848 + - n_strx: 28 + n_type: 0x01 + n_sect: 0 + n_desc: 768 + n_value: 0 + - n_strx: 47 + n_type: 0x01 + n_sect: 0 + n_desc: 768 + n_value: 0 + - n_strx: 64 + n_type: 0x01 + n_sect: 0 + n_desc: 768 + n_value: 0 + - n_strx: 101 + n_type: 0x01 + n_sect: 0 + n_desc: 512 + n_value: 0 + StringTable: + - ' ' + - __mh_execute_header + - _main + - _swift_beginAccess + - _swift_endAccess + - _swift_getTypeByMangledNameInContext + - dyld_stub_binder + - '_$s1a3useyyxlF' + - '_$s1a1fyySi_SiztF' + - ___swift_instantiateConcreteTypeFromMangledName + - _symbolic Si_Sit + - ___swift_reflection_version + - __dyld_private + - '_$sSi_SitMD' + - '_$s1a15global_constantSivp' + - '_$s1a15global_variableSivp' + - '/Volumes/Jonas/internal-stable/build/Ninja-RelWithDebInfoAssert+stdlib-RelWithDebInfo/lldb-macosx-x86_64/lldb-test-build.noindex/lang/swift/variables/let/TestSwiftLetConstants.test_let_constants_dwarf/a.swiftmodule' + - '/Volumes/Jonas/internal-stable/lldb/packages/Python/lldbsuite/test/lang/swift/variables/let/' + - main.swift + - '/Volumes/Jonas/internal-stable/build/Ninja-RelWithDebInfoAssert+stdlib-RelWithDebInfo/lldb-macosx-x86_64/lldb-test-build.noindex/lang/swift/variables/let/TestSwiftLetConstants.test_let_constants_dwarf/main.o' + - _main + - '_$s1a3useyyxlF' + - '_$s1a1fyySi_SiztF' + - ___swift_instantiateConcreteTypeFromMangledName + - _symbolic Si_Sit + - ___swift_reflection_version + - '_$sSi_SitMD' + - '_$s1a15global_constantSivp' + - '_$s1a15global_variableSivp' + - '' + - '' + - '' + - '' + - '' + - '' +... diff --git a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp index d109dc1c4905c..66ac2908de4be 100644 --- a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp +++ b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp @@ -2038,19 +2038,22 @@ UUID ObjectFileMachO::GetSharedCacheUUID(FileSpec dyld_shared_cache, return dsc_uuid; } -static bool ParseNList(DataExtractor &nlist_data, - lldb::offset_t &nlist_data_offset, - size_t nlist_byte_size, struct nlist_64 &nlist) { +static llvm::Optional +ParseNList(DataExtractor &nlist_data, lldb::offset_t &nlist_data_offset, + size_t nlist_byte_size) { + struct nlist_64 nlist; if (!nlist_data.ValidOffsetForDataOfSize(nlist_data_offset, nlist_byte_size)) - return false; + return {}; nlist.n_strx = nlist_data.GetU32_unchecked(&nlist_data_offset); nlist.n_type = nlist_data.GetU8_unchecked(&nlist_data_offset); nlist.n_sect = nlist_data.GetU8_unchecked(&nlist_data_offset); nlist.n_desc = nlist_data.GetU16_unchecked(&nlist_data_offset); nlist.n_value = nlist_data.GetAddress_unchecked(&nlist_data_offset); - return true; + return nlist; } +enum { DebugSymbols = true, NonDebugSymbols = false }; + size_t ObjectFileMachO::ParseSymtab() { static Timer::Category func_cat(LLVM_PRETTY_FUNCTION); Timer scoped_timer(func_cat, "ObjectFileMachO::ParseSymtab () module = %s", @@ -3680,10 +3683,18 @@ size_t ObjectFileMachO::ParseSymtab() { typedef std::map SymbolIndexToName; UndefinedNameToDescMap undefined_name_to_desc; SymbolIndexToName reexport_shlib_needs_fixup; - for (; nlist_idx < symtab_load_command.nsyms; ++nlist_idx) { - struct nlist_64 nlist; - if (!ParseNList(nlist_data, nlist_data_offset, nlist_byte_size, nlist)) - break; + + // Symtab parsing is a huge mess. Everything is entangled and the code + // requires access to a ridiculous amount of variables. LLDB depends + // heavily on the proper merging of symbols and to get that right we need + // to make sure we have parsed all the debug symbols first. Therefore we + // invoke the lambda twice, once to parse only the debug symbols and then + // once more to parse the remaining symbols. + auto ParseSymbolLambda = [&](struct nlist_64 &nlist, uint32_t nlist_idx, + bool debug_only) { + const bool is_debug = ((nlist.n_type & N_STAB) != 0); + if (is_debug != debug_only) + return true; const char *symbol_name_non_abi_mangled = nullptr; const char *symbol_name = nullptr; @@ -3699,7 +3710,7 @@ size_t ObjectFileMachO::ParseSymtab() { "0x%x in %s, ignoring symbol\n", nlist_idx, nlist.n_strx, module_sp->GetFileSpec().GetPath().c_str()); - continue; + return true; } if (symbol_name[0] == '\0') symbol_name = nullptr; @@ -3719,7 +3730,6 @@ size_t ObjectFileMachO::ParseSymtab() { bool demangled_is_synthesized = false; bool set_value = true; - const bool is_debug = ((nlist.n_type & N_STAB) != 0); assert(sym_idx < num_syms); sym[sym_idx].SetDebug(is_debug); @@ -4270,7 +4280,7 @@ size_t ObjectFileMachO::ParseSymtab() { if (!add_nlist) { sym[sym_idx].Clear(); - continue; + return true; } uint64_t symbol_value = nlist.n_value; @@ -4361,7 +4371,6 @@ size_t ObjectFileMachO::ParseSymtab() { range; range = N_FUN_addr_to_sym_idx.equal_range(nlist.n_value); if (range.first != range.second) { - bool found_it = false; for (ValueToSymbolIndexMap::const_iterator pos = range.first; pos != range.second; ++pos) { if (sym[sym_idx].GetMangled().GetName(lldb::eLanguageTypeUnknown, @@ -4378,12 +4387,9 @@ size_t ObjectFileMachO::ParseSymtab() { resolver_addresses.end()) sym[pos->second].SetType(eSymbolTypeResolver); sym[sym_idx].Clear(); - found_it = true; - break; + return true; } } - if (found_it) - continue; } else { if (resolver_addresses.find(nlist.n_value) != resolver_addresses.end()) @@ -4401,7 +4407,6 @@ size_t ObjectFileMachO::ParseSymtab() { range; range = N_STSYM_addr_to_sym_idx.equal_range(nlist.n_value); if (range.first != range.second) { - bool found_it = false; for (ValueToSymbolIndexMap::const_iterator pos = range.first; pos != range.second; ++pos) { if (sym[sym_idx].GetMangled().GetName(lldb::eLanguageTypeUnknown, @@ -4415,12 +4420,9 @@ size_t ObjectFileMachO::ParseSymtab() { sym[pos->second].SetExternal(sym[sym_idx].IsExternal()); sym[pos->second].SetFlags(nlist.n_type << 16 | nlist.n_desc); sym[sym_idx].Clear(); - found_it = true; - break; + return true; } } - if (found_it) - continue; } else { // Combine N_GSYM stab entries with the non stab symbol. const char *gsym_name = sym[sym_idx] @@ -4443,7 +4445,7 @@ size_t ObjectFileMachO::ParseSymtab() { // the symbol table. sym[GSYM_sym_idx].SetFlags(nlist.n_type << 16 | nlist.n_desc); sym[sym_idx].Clear(); - continue; + return true; } } } @@ -4467,6 +4469,36 @@ size_t ObjectFileMachO::ParseSymtab() { sym[sym_idx].SetDemangledNameIsSynthesized(true); ++sym_idx; + return true; + }; + + // First parse all the nlists but don't process them yet. See the next + // comment for an explanation why. + std::vector nlists; + nlists.reserve(symtab_load_command.nsyms); + for (; nlist_idx < symtab_load_command.nsyms; ++nlist_idx) { + if (auto nlist = + ParseNList(nlist_data, nlist_data_offset, nlist_byte_size)) + nlists.push_back(*nlist); + else + break; + } + + // Now parse all the debug symbols. This is needed to merge non-debug + // symbols in the next step. Non-debug symbols are always coalesced into + // the debug symbol. Doing this in one step would mean that some symbols + // won't be merged. + nlist_idx = 0; + for (auto &nlist : nlists) { + if (!ParseSymbolLambda(nlist, nlist_idx++, DebugSymbols)) + break; + } + + // Finally parse all the non debug symbols. + nlist_idx = 0; + for (auto &nlist : nlists) { + if (!ParseSymbolLambda(nlist, nlist_idx++, NonDebugSymbols)) + break; } for (const auto &pos : reexport_shlib_needs_fixup) { From 6f23e5f6d52b21d479c1faed6054c545f24dff27 Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Tue, 8 Oct 2019 00:14:02 +0000 Subject: [PATCH 195/254] [CMake] Remove stale comment llvm-svn: 373995 --- lldb/test/CMakeLists.txt | 2 -- 1 file changed, 2 deletions(-) diff --git a/lldb/test/CMakeLists.txt b/lldb/test/CMakeLists.txt index 41dd30ec77c08..985592f2b20bc 100644 --- a/lldb/test/CMakeLists.txt +++ b/lldb/test/CMakeLists.txt @@ -128,8 +128,6 @@ endif() set(LLDB_DOTEST_ARGS ${LLDB_TEST_COMMON_ARGS};${LLDB_TEST_USER_ARGS}) set_property(GLOBAL PROPERTY LLDB_DOTEST_ARGS_PROPERTY ${LLDB_DOTEST_ARGS}) -# If tests crash cause LLDB to crash, or things are otherwise unstable, or if machine-parsable -# output is desired (i.e. in continuous integration contexts) check-lldb-single is a better target. add_custom_target(check-lldb) add_dependencies(check-lldb lldb-test-deps) set_target_properties(check-lldb PROPERTIES FOLDER "lldb misc") From 37cf39df20825980adf55143005b553bb7e12047 Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Tue, 8 Oct 2019 00:21:34 +0000 Subject: [PATCH 196/254] [CMake] Track test dependencies with add_lldb_test_dependency I often use `ninja lldb-test-deps` to build all the test dependencies before running a subset of the tests with `lit --filter`. This functionality seems to break relatively often because test dependencies are tracked in an ad-hoc way acrooss cmake files. This patch adds a helper function `add_lldb_test_dependency` to unify test dependency tracking by adding dependencies to lldb-test-deps. Differential revision: https://reviews.llvm.org/D68612 llvm-svn: 373996 --- lldb/CMakeLists.txt | 46 +++++++++++++-------------- lldb/cmake/modules/AddLLDB.cmake | 4 +++ lldb/lit/CMakeLists.txt | 14 +++----- lldb/test/CMakeLists.txt | 8 ++--- lldb/unittests/CMakeLists.txt | 1 + lldb/utils/lldb-dotest/CMakeLists.txt | 2 +- 6 files changed, 37 insertions(+), 38 deletions(-) diff --git a/lldb/CMakeLists.txt b/lldb/CMakeLists.txt index 90732cf9547ab..48b6f694460d1 100644 --- a/lldb/CMakeLists.txt +++ b/lldb/CMakeLists.txt @@ -109,40 +109,50 @@ if(LLDB_INCLUDE_TESTS) message(FATAL_ERROR "LLDB test compilers not specified. Tests will not run.") endif() - set(LLDB_TEST_DEPS lldb) + add_custom_target(lldb-test-deps) + set_target_properties(lldb-test-deps PROPERTIES FOLDER "lldb misc") + add_lldb_test_dependency(lldb) + + # lldb-test is an hard dependency for the testsuite. + add_lldb_test_dependency(lldb-test) # darwin-debug is an hard dependency for the testsuite. if (CMAKE_SYSTEM_NAME MATCHES "Darwin") - list(APPEND LLDB_TEST_DEPS darwin-debug) + add_lldb_test_dependency(darwin-debug) endif() - # lldb-test is an hard dependency for the testsuite. - list(APPEND LLDB_TEST_DEPS lldb-test) - if(TARGET lldb-server) - list(APPEND LLDB_TEST_DEPS lldb-server) + add_lldb_test_dependency(lldb-server) endif() if(TARGET lldb-vscode) - list(APPEND LLDB_TEST_DEPS lldb-vscode) + add_lldb_test_dependency(lldb-vscode) endif() if(TARGET lldb-instr) - list(APPEND LLDB_TEST_DEPS lldb-instr) + add_lldb_test_dependency(lldb-instr) endif() if(NOT LLDB_BUILT_STANDALONE) - list(APPEND LLDB_TEST_DEPS yaml2obj) + add_lldb_test_dependency(yaml2obj) + endif() + + if(TARGET dsymutil) + add_lldb_test_dependency(dsymutil) endif() if(TARGET liblldb) - list(APPEND LLDB_TEST_DEPS liblldb) + add_lldb_test_dependency(liblldb) + endif() + + if(TARGET lldb-framework) + add_lldb_test_dependency(lldb-framework) endif() # Add dependencies if we test with the in-tree clang. # This works with standalone builds as they import the clang target. if(TARGET clang) - list(APPEND LLDB_TEST_DEPS clang) + add_lldb_test_dependency(clang) if(APPLE) # If we build clang, we should build libcxx. # FIXME: Standalone builds should import the cxx target as well. @@ -171,23 +181,11 @@ if(LLDB_INCLUDE_TESTS) "via `LLDB_INCLUDE_TESTS=OFF`.") endif() endif() - list(APPEND LLDB_TEST_DEPS cxx) + add_lldb_test_dependency(cxx) endif() endif() endif() - if(TARGET dsymutil) - list(APPEND LLDB_TEST_DEPS dsymutil) - endif() - - if(TARGET lldb-framework) - list(APPEND LLDB_TEST_DEPS lldb-framework) - endif() - - add_custom_target(lldb-test-deps) - add_dependencies(lldb-test-deps ${LLDB_TEST_DEPS}) - set_target_properties(lldb-test-deps PROPERTIES FOLDER "lldb misc") - add_subdirectory(test) add_subdirectory(unittests) add_subdirectory(lit) diff --git a/lldb/cmake/modules/AddLLDB.cmake b/lldb/cmake/modules/AddLLDB.cmake index c6a7979279a00..f6402f94385cb 100644 --- a/lldb/cmake/modules/AddLLDB.cmake +++ b/lldb/cmake/modules/AddLLDB.cmake @@ -27,6 +27,10 @@ function(lldb_tablegen) endif() endfunction(lldb_tablegen) +function(add_lldb_test_dependency name) + add_dependencies(lldb-test-deps ${name}) +endfunction(add_lldb_test_dependency) + function(add_lldb_library name) include_directories(BEFORE ${CMAKE_CURRENT_BINARY_DIR} diff --git a/lldb/lit/CMakeLists.txt b/lldb/lit/CMakeLists.txt index fbd64afbb1c92..b7ba6789842a6 100644 --- a/lldb/lit/CMakeLists.txt +++ b/lldb/lit/CMakeLists.txt @@ -46,13 +46,9 @@ string(REPLACE ${CMAKE_CFG_INTDIR} ${dotest_args_replacement} LLDB_DOTEST_ARGS " string(REPLACE ${CMAKE_CFG_INTDIR} ${LLVM_BUILD_MODE} LLDB_LIBS_DIR ${LLVM_LIBRARY_OUTPUT_INTDIR}) string(REPLACE ${CMAKE_CFG_INTDIR} ${LLVM_BUILD_MODE} LLDB_TOOLS_DIR ${LLVM_RUNTIME_OUTPUT_INTDIR}) -list(APPEND LLDB_TEST_DEPS - LLDBUnitTests - dsymutil +add_lldb_test_dependency( lit-cpuid llc - lldb - lldb-test lli llvm-config llvm-dwarfdump @@ -64,7 +60,7 @@ list(APPEND LLDB_TEST_DEPS ) if(TARGET lld) - list(APPEND LLDB_TEST_DEPS lld) + add_lldb_test_dependency(lld) endif() # the value is not canonicalized within LLVM @@ -93,7 +89,7 @@ configure_file( ${CMAKE_CURRENT_BINARY_DIR}/lit-lldb-init) if(NOT LLDB_BUILT_STANDALONE) - list(APPEND LLDB_TEST_DEPS + add_dependencies(lldb-test-deps FileCheck count not @@ -102,7 +98,7 @@ endif() add_lit_testsuite(check-lldb-lit "Running lldb lit test suite" ${CMAKE_CURRENT_BINARY_DIR} - DEPENDS ${LLDB_TEST_DEPS} + DEPENDS lldb-test-deps ) set_target_properties(check-lldb-lit PROPERTIES FOLDER "lldb tests") @@ -115,5 +111,5 @@ endif() add_lit_testsuites(LLDB ${CMAKE_CURRENT_SOURCE_DIR} - DEPENDS ${LLDB_TEST_DEPS} + DEPENDS lldb-test-deps ) diff --git a/lldb/test/CMakeLists.txt b/lldb/test/CMakeLists.txt index 985592f2b20bc..f4bf7df92ae5c 100644 --- a/lldb/test/CMakeLists.txt +++ b/lldb/test/CMakeLists.txt @@ -10,7 +10,7 @@ function(add_python_test_target name test_script args comment) COMMENT "${comment}" USES_TERMINAL ) - add_dependencies(${name} ${LLDB_TEST_DEPS}) + add_dependencies(${name} lldb-test-deps) endfunction() # The default architecture with which to compile test executables is the default LLVM target @@ -109,17 +109,17 @@ if(CMAKE_HOST_APPLE) endif() message(STATUS "LLDB tests use out-of-tree debugserver: ${system_debugserver_path}") list(APPEND LLDB_TEST_COMMON_ARGS --out-of-tree-debugserver) - add_dependencies(lldb-test-deps debugserver) + add_lldb_test_dependency(debugserver) elseif(TARGET debugserver) set(debugserver_path ${LLVM_RUNTIME_OUTPUT_INTDIR}/debugserver) message(STATUS "LLDB Tests use just-built debugserver: ${debugserver_path}") list(APPEND LLDB_TEST_COMMON_ARGS --server ${debugserver_path}) - add_dependencies(lldb-test-deps debugserver) + add_lldb_test_dependency(debugserver) elseif(TARGET lldb-server) set(lldb_server_path ${LLVM_RUNTIME_OUTPUT_INTDIR}/lldb-server) message(STATUS "LLDB Tests use just-built lldb-server: ${lldb_server_path}") list(APPEND LLDB_TEST_COMMON_ARGS --server ${lldb_server_path}) - add_dependencies(lldb-test-deps lldb-server) + add_lldb_test_dependency(lldb-server) else() message(WARNING "LLDB Tests enabled, but no server available") endif() diff --git a/lldb/unittests/CMakeLists.txt b/lldb/unittests/CMakeLists.txt index 084b8bf23eaf7..22c684f4fce35 100644 --- a/lldb/unittests/CMakeLists.txt +++ b/lldb/unittests/CMakeLists.txt @@ -1,5 +1,6 @@ add_custom_target(LLDBUnitTests) set_target_properties(LLDBUnitTests PROPERTIES FOLDER "lldb tests") +add_dependencies(lldb-test-deps LLDBUnitTests) include_directories(${LLDB_SOURCE_ROOT}) include_directories(${LLDB_PROJECT_ROOT}/unittests) diff --git a/lldb/utils/lldb-dotest/CMakeLists.txt b/lldb/utils/lldb-dotest/CMakeLists.txt index 0c61b2bf26a6c..4f1bd7304abfa 100644 --- a/lldb/utils/lldb-dotest/CMakeLists.txt +++ b/lldb/utils/lldb-dotest/CMakeLists.txt @@ -1,6 +1,6 @@ # Make lldb-dotest a custom target. add_custom_target(lldb-dotest) -add_dependencies(lldb-dotest ${LLDB_TEST_DEPS}) +add_dependencies(lldb-dotest lldb-test-deps) set_target_properties(lldb-dotest PROPERTIES FOLDER "lldb utils") get_property(LLDB_DOTEST_ARGS GLOBAL PROPERTY LLDB_DOTEST_ARGS_PROPERTY) From 5d10e417e97bfe7581c72460c46f83722ece6693 Mon Sep 17 00:00:00 2001 From: Lawrence D'Anna Date: Tue, 8 Oct 2019 00:26:53 +0000 Subject: [PATCH 197/254] DWIMy filterspecs for dotest.py Summary: dotest.py currently requires a filterspec to be of the form `TestCase.test_method`. This patch makes it more flexible, so you can pass `TestModule.TestCase.test_method` or `TestModule.TestCase` or `TestCase.test_method` or just `test_method`. This makes it more convenient to just copy a test name out of the terminal after running a bunch of tests and use it as a filterspec. Reviewers: JDevlieghere, jasonmolenda, labath Reviewed By: JDevlieghere Subscribers: jingham, lldb-commits Tags: #lldb Differential Revision: https://reviews.llvm.org/D68545 llvm-svn: 373997 --- lldb/packages/Python/lldbsuite/test/dotest.py | 44 +++++++++++-------- .../Python/lldbsuite/test/dotest_args.py | 4 +- 2 files changed, 29 insertions(+), 19 deletions(-) diff --git a/lldb/packages/Python/lldbsuite/test/dotest.py b/lldb/packages/Python/lldbsuite/test/dotest.py index 04917185de265..652a02e5ed65c 100644 --- a/lldb/packages/Python/lldbsuite/test/dotest.py +++ b/lldb/packages/Python/lldbsuite/test/dotest.py @@ -667,34 +667,42 @@ def visit_file(dir, name): # Thoroughly check the filterspec against the base module and admit # the (base, filterspec) combination only when it makes sense. - filterspec = None - for filterspec in configuration.filters: - # Optimistically set the flag to True. - filtered = True - module = __import__(base) - parts = filterspec.split('.') - obj = module + + def check(obj, parts): for part in parts: try: parent, obj = obj, getattr(obj, part) except AttributeError: # The filterspec has failed. - filtered = False - break - - # If filtered, we have a good filterspec. Add it. - if filtered: - # print("adding filter spec %s to module %s" % (filterspec, module)) - configuration.suite.addTests( - unittest2.defaultTestLoader.loadTestsFromName( - filterspec, module)) - continue + return False + return True + + module = __import__(base) + + def iter_filters(): + for filterspec in configuration.filters: + parts = filterspec.split('.') + if check(module, parts): + yield filterspec + elif parts[0] == base and len(parts) > 1 and check(module, parts[1:]): + yield '.'.join(parts[1:]) + else: + for key,value in module.__dict__.items(): + if check(value, parts): + yield key + '.' + filterspec + + filtered = False + for filterspec in iter_filters(): + filtered = True + print("adding filter spec %s to module %s" % (filterspec, repr(module))) + tests = unittest2.defaultTestLoader.loadTestsFromName(filterspec, module) + configuration.suite.addTests(tests) # Forgo this module if the (base, filterspec) combo is invalid if configuration.filters and not filtered: return - if not filterspec or not filtered: + if not filtered: # Add the entire file's worth of tests since we're not filtered. # Also the fail-over case when the filterspec branch # (base, filterspec) combo doesn't make sense. diff --git a/lldb/packages/Python/lldbsuite/test/dotest_args.py b/lldb/packages/Python/lldbsuite/test/dotest_args.py index 8de4d8dbb2f79..4922f27c7bf21 100644 --- a/lldb/packages/Python/lldbsuite/test/dotest_args.py +++ b/lldb/packages/Python/lldbsuite/test/dotest_args.py @@ -61,7 +61,9 @@ def create_parser(): '-f', metavar='filterspec', action='append', - help='Specify a filter, which consists of the test class name, a dot, followed by the test method, to only admit such test into the test suite') # FIXME: Example? + help=('Specify a filter, which looks like "TestModule.TestClass.test_name". '+ + 'You may also use shortened filters, such as '+ + '"TestModule.TestClass", "TestClass.test_name", or just "test_name".')) group.add_argument( '-p', metavar='pattern', From 0016b450bee87bec816f764acfb9fa6afc31c352 Mon Sep 17 00:00:00 2001 From: Haibo Huang Date: Tue, 8 Oct 2019 00:33:26 +0000 Subject: [PATCH 198/254] [lldb] Reverts part of 61f471a Seems I wrongly merged an old patch. Reverts the change related to python dir for windows. FileSpec should always contain normalized path. I.e. using '/' even in windows. llvm-svn: 373998 --- .../ScriptInterpreter/Python/ScriptInterpreterPython.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp index 54dc60c3ed04e..fefa12c70684d 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp +++ b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp @@ -319,6 +319,12 @@ void ScriptInterpreterPython::ComputePythonDir( // x86_64, or bin on Windows). llvm::sys::path::remove_filename(path); llvm::sys::path::append(path, LLDB_PYTHON_RELATIVE_LIBDIR); + +#if defined(_WIN32) + // This will be injected directly through FileSpec.GetDirectory().SetString(), + // so we need to normalize manually. + std::replace(path.begin(), path.end(), '\\', '/'); +#endif } FileSpec ScriptInterpreterPython::GetPythonDir() { @@ -334,7 +340,6 @@ FileSpec ScriptInterpreterPython::GetPythonDir() { #else ComputePythonDir(path); #endif - llvm::sys::path::native(path); spec.GetDirectory().SetString(path); return spec; }(); From 40943b5193f22182ed9c47aea822167a17072f3d Mon Sep 17 00:00:00 2001 From: Jan Korous Date: Tue, 8 Oct 2019 00:36:19 +0000 Subject: [PATCH 199/254] Revert "Add VFS support for sanitizers' blacklist" Fix tests on Windows for now. This reverts commit 96ac97a4213287003f08636d0c372b3f71e9cfca. llvm-svn: 373999 --- clang/lib/AST/ASTContext.cpp | 19 +------------------ .../sanitizer-blacklist-vfsoverlay.yaml | 15 --------------- clang/test/CodeGen/ubsan-blacklist.c | 11 ----------- 3 files changed, 1 insertion(+), 44 deletions(-) delete mode 100644 clang/test/CodeGen/Inputs/sanitizer-blacklist-vfsoverlay.yaml diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index 906c54194d942..a41b64ffcc81f 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -72,7 +72,6 @@ #include "llvm/ADT/PointerUnion.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" @@ -82,7 +81,6 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/VirtualFileSystem.h" #include "llvm/Support/raw_ostream.h" #include #include @@ -828,18 +826,6 @@ static bool isAddrSpaceMapManglingEnabled(const TargetInfo &TI, llvm_unreachable("getAddressSpaceMapMangling() doesn't cover anything."); } -static std::vector -getRealPaths(llvm::vfs::FileSystem &VFS, llvm::ArrayRef Paths) { - std::vector Result; - llvm::SmallString<128> Buffer; - for (const auto &File : Paths) { - if (std::error_code EC = VFS.getRealPath(File, Buffer)) - llvm::report_fatal_error("can't open file '" + File + "': " + EC.message()); - Result.push_back(Buffer.str()); - } - return Result; -} - ASTContext::ASTContext(LangOptions &LOpts, SourceManager &SM, IdentifierTable &idents, SelectorTable &sels, Builtin::Context &builtins) @@ -847,10 +833,7 @@ ASTContext::ASTContext(LangOptions &LOpts, SourceManager &SM, TemplateSpecializationTypes(this_()), DependentTemplateSpecializationTypes(this_()), SubstTemplateTemplateParmPacks(this_()), SourceMgr(SM), LangOpts(LOpts), - SanitizerBL(new SanitizerBlacklist( - getRealPaths(SM.getFileManager().getVirtualFileSystem(), - LangOpts.SanitizerBlacklistFiles), - SM)), + SanitizerBL(new SanitizerBlacklist(LangOpts.SanitizerBlacklistFiles, SM)), XRayFilter(new XRayFunctionFilter(LangOpts.XRayAlwaysInstrumentFiles, LangOpts.XRayNeverInstrumentFiles, LangOpts.XRayAttrListFiles, SM)), diff --git a/clang/test/CodeGen/Inputs/sanitizer-blacklist-vfsoverlay.yaml b/clang/test/CodeGen/Inputs/sanitizer-blacklist-vfsoverlay.yaml deleted file mode 100644 index df2b221897693..0000000000000 --- a/clang/test/CodeGen/Inputs/sanitizer-blacklist-vfsoverlay.yaml +++ /dev/null @@ -1,15 +0,0 @@ -{ - 'version': 0, - 'roots': [ - { 'name': '@DIR@', 'type': 'directory', - 'contents': [ - { 'name': 'only-virtual-file.blacklist', 'type': 'file', - 'external-contents': '@REAL_FILE@' - }, - { 'name': 'invalid-virtual-file.blacklist', 'type': 'file', - 'external-contents': '@NONEXISTENT_FILE@' - } - ] - } - ] -} diff --git a/clang/test/CodeGen/ubsan-blacklist.c b/clang/test/CodeGen/ubsan-blacklist.c index d6b4b71431480..666003bd9233c 100644 --- a/clang/test/CodeGen/ubsan-blacklist.c +++ b/clang/test/CodeGen/ubsan-blacklist.c @@ -5,17 +5,6 @@ // RUN: %clang_cc1 -fsanitize=unsigned-integer-overflow -fsanitize-blacklist=%t-func.blacklist -emit-llvm %s -o - | FileCheck %s --check-prefix=FUNC // RUN: %clang_cc1 -fsanitize=unsigned-integer-overflow -fsanitize-blacklist=%t-file.blacklist -emit-llvm %s -o - | FileCheck %s --check-prefix=FILE -// RUN: rm -f %t-vfsoverlay.yaml -// RUN: rm -f %t-nonexistent.blacklist -// RUN: sed -e "s|@DIR@|%T|g" %S/Inputs/sanitizer-blacklist-vfsoverlay.yaml | sed -e "s|@REAL_FILE@|%t-func.blacklist|g" | sed -e "s|@NONEXISTENT_FILE@|%t-nonexistent.blacklist|g" > %t-vfsoverlay.yaml -// RUN: %clang_cc1 -fsanitize=unsigned-integer-overflow -ivfsoverlay %t-vfsoverlay.yaml -fsanitize-blacklist=%T/only-virtual-file.blacklist -emit-llvm %s -o - | FileCheck %s --check-prefix=FUNC - -// RUN: not %clang_cc1 -fsanitize=unsigned-integer-overflow -ivfsoverlay %t-vfsoverlay.yaml -fsanitize-blacklist=%T/invalid-virtual-file.blacklist -emit-llvm %s -o - 2>&1 | FileCheck %s --check-prefix=INVALID-MAPPED-FILE -// INVALID-MAPPED-FILE: invalid-virtual-file.blacklist': No such file or directory - -// RUN: not %clang_cc1 -fsanitize=unsigned-integer-overflow -ivfsoverlay %t-vfsoverlay.yaml -fsanitize-blacklist=%t-nonexistent.blacklist -emit-llvm %s -o - 2>&1 | FileCheck %s --check-prefix=INVALID -// INVALID: nonexistent.blacklist': No such file or directory - unsigned i; // DEFAULT: @hash From ce3314cf28554265cac55bfabd86a192ebc07674 Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Tue, 8 Oct 2019 00:44:54 +0000 Subject: [PATCH 200/254] [CMake] Add two more uses of add_lldb_test_dependency llvm-svn: 374000 --- lldb/CMakeLists.txt | 3 --- lldb/lit/CMakeLists.txt | 2 +- lldb/test/CMakeLists.txt | 8 +------- 3 files changed, 2 insertions(+), 11 deletions(-) diff --git a/lldb/CMakeLists.txt b/lldb/CMakeLists.txt index 48b6f694460d1..caf1c7b23a994 100644 --- a/lldb/CMakeLists.txt +++ b/lldb/CMakeLists.txt @@ -135,9 +135,6 @@ if(LLDB_INCLUDE_TESTS) if(NOT LLDB_BUILT_STANDALONE) add_lldb_test_dependency(yaml2obj) - endif() - - if(TARGET dsymutil) add_lldb_test_dependency(dsymutil) endif() diff --git a/lldb/lit/CMakeLists.txt b/lldb/lit/CMakeLists.txt index b7ba6789842a6..3e759b0e187a3 100644 --- a/lldb/lit/CMakeLists.txt +++ b/lldb/lit/CMakeLists.txt @@ -89,7 +89,7 @@ configure_file( ${CMAKE_CURRENT_BINARY_DIR}/lit-lldb-init) if(NOT LLDB_BUILT_STANDALONE) - add_dependencies(lldb-test-deps + add_lldb_test_dependency( FileCheck count not diff --git a/lldb/test/CMakeLists.txt b/lldb/test/CMakeLists.txt index f4bf7df92ae5c..7080dc4520457 100644 --- a/lldb/test/CMakeLists.txt +++ b/lldb/test/CMakeLists.txt @@ -132,16 +132,10 @@ add_custom_target(check-lldb) add_dependencies(check-lldb lldb-test-deps) set_target_properties(check-lldb PROPERTIES FOLDER "lldb misc") -# If we're building with an in-tree clang, then list clang as a dependency -# to run tests. -if (TARGET clang) - add_dependencies(check-lldb clang) -endif() - # LLD is required to link test executables on Windows. if (CMAKE_SYSTEM_NAME MATCHES "Windows") if (TARGET lld) - add_dependencies(check-lldb lld) + add_lldb_test_dependency(lld) else () message(WARNING "lld required to test LLDB on Windows") endif () From ffc67f92514c97558a93b51b46f20c264d2d31e6 Mon Sep 17 00:00:00 2001 From: Antonio Afonso Date: Tue, 8 Oct 2019 01:10:03 +0000 Subject: [PATCH 201/254] Fixing missing lldb-scripts rename from D68370 llvm-svn: 374005 --- lldb/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lldb/CMakeLists.txt b/lldb/CMakeLists.txt index caf1c7b23a994..317643a44225f 100644 --- a/lldb/CMakeLists.txt +++ b/lldb/CMakeLists.txt @@ -237,7 +237,7 @@ if (NOT LLDB_DISABLE_PYTHON) add_dependencies(lldb-python-scripts finish_swig) install(DIRECTORY ${CMAKE_BINARY_DIR}/${LLDB_PYTHON_RELATIVE_PATH}/ DESTINATION ${LLDB_PYTHON_RELATIVE_PATH} - COMPONENT lldb-scripts) + COMPONENT lldb-python-scripts) if (NOT LLVM_ENABLE_IDE) add_llvm_install_targets(install-lldb-python-scripts COMPONENT lldb-python-scripts From 3dab5e825b8c9ef0e7d129e6aaa382b69f813c48 Mon Sep 17 00:00:00 2001 From: Jan Korous Date: Tue, 8 Oct 2019 01:13:17 +0000 Subject: [PATCH 202/254] Reland 'Add VFS support for sanitizers' blacklist' The original patch broke the test for Windows. Trying to fix as per Reid's suggestions outlined here: https://reviews.llvm.org/rC371663 Differential Revision: https://reviews.llvm.org/D67742 llvm-svn: 374006 --- clang/lib/AST/ASTContext.cpp | 19 ++++++++++++++++++- .../sanitizer-blacklist-vfsoverlay.yaml | 15 +++++++++++++++ clang/test/CodeGen/ubsan-blacklist.c | 11 +++++++++++ 3 files changed, 44 insertions(+), 1 deletion(-) create mode 100644 clang/test/CodeGen/Inputs/sanitizer-blacklist-vfsoverlay.yaml diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index a41b64ffcc81f..906c54194d942 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -72,6 +72,7 @@ #include "llvm/ADT/PointerUnion.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" @@ -81,6 +82,7 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Support/VirtualFileSystem.h" #include "llvm/Support/raw_ostream.h" #include #include @@ -826,6 +828,18 @@ static bool isAddrSpaceMapManglingEnabled(const TargetInfo &TI, llvm_unreachable("getAddressSpaceMapMangling() doesn't cover anything."); } +static std::vector +getRealPaths(llvm::vfs::FileSystem &VFS, llvm::ArrayRef Paths) { + std::vector Result; + llvm::SmallString<128> Buffer; + for (const auto &File : Paths) { + if (std::error_code EC = VFS.getRealPath(File, Buffer)) + llvm::report_fatal_error("can't open file '" + File + "': " + EC.message()); + Result.push_back(Buffer.str()); + } + return Result; +} + ASTContext::ASTContext(LangOptions &LOpts, SourceManager &SM, IdentifierTable &idents, SelectorTable &sels, Builtin::Context &builtins) @@ -833,7 +847,10 @@ ASTContext::ASTContext(LangOptions &LOpts, SourceManager &SM, TemplateSpecializationTypes(this_()), DependentTemplateSpecializationTypes(this_()), SubstTemplateTemplateParmPacks(this_()), SourceMgr(SM), LangOpts(LOpts), - SanitizerBL(new SanitizerBlacklist(LangOpts.SanitizerBlacklistFiles, SM)), + SanitizerBL(new SanitizerBlacklist( + getRealPaths(SM.getFileManager().getVirtualFileSystem(), + LangOpts.SanitizerBlacklistFiles), + SM)), XRayFilter(new XRayFunctionFilter(LangOpts.XRayAlwaysInstrumentFiles, LangOpts.XRayNeverInstrumentFiles, LangOpts.XRayAttrListFiles, SM)), diff --git a/clang/test/CodeGen/Inputs/sanitizer-blacklist-vfsoverlay.yaml b/clang/test/CodeGen/Inputs/sanitizer-blacklist-vfsoverlay.yaml new file mode 100644 index 0000000000000..df2b221897693 --- /dev/null +++ b/clang/test/CodeGen/Inputs/sanitizer-blacklist-vfsoverlay.yaml @@ -0,0 +1,15 @@ +{ + 'version': 0, + 'roots': [ + { 'name': '@DIR@', 'type': 'directory', + 'contents': [ + { 'name': 'only-virtual-file.blacklist', 'type': 'file', + 'external-contents': '@REAL_FILE@' + }, + { 'name': 'invalid-virtual-file.blacklist', 'type': 'file', + 'external-contents': '@NONEXISTENT_FILE@' + } + ] + } + ] +} diff --git a/clang/test/CodeGen/ubsan-blacklist.c b/clang/test/CodeGen/ubsan-blacklist.c index 666003bd9233c..61d33c983e619 100644 --- a/clang/test/CodeGen/ubsan-blacklist.c +++ b/clang/test/CodeGen/ubsan-blacklist.c @@ -5,6 +5,17 @@ // RUN: %clang_cc1 -fsanitize=unsigned-integer-overflow -fsanitize-blacklist=%t-func.blacklist -emit-llvm %s -o - | FileCheck %s --check-prefix=FUNC // RUN: %clang_cc1 -fsanitize=unsigned-integer-overflow -fsanitize-blacklist=%t-file.blacklist -emit-llvm %s -o - | FileCheck %s --check-prefix=FILE +// RUN: rm -f %t-vfsoverlay.yaml +// RUN: rm -f %t-nonexistent.blacklist +// RUN: sed -e "s|@DIR@|%/T|g" %S/Inputs/sanitizer-blacklist-vfsoverlay.yaml | sed -e "s|@REAL_FILE@|%/t-func.blacklist|g" | sed -e "s|@NONEXISTENT_FILE@|%/t-nonexistent.blacklist|g" > %t-vfsoverlay.yaml +// RUN: %clang_cc1 -fsanitize=unsigned-integer-overflow -ivfsoverlay %t-vfsoverlay.yaml -fsanitize-blacklist=%T/only-virtual-file.blacklist -emit-llvm %s -o - | FileCheck %s --check-prefix=FUNC + +// RUN: not %clang_cc1 -fsanitize=unsigned-integer-overflow -ivfsoverlay %t-vfsoverlay.yaml -fsanitize-blacklist=%T/invalid-virtual-file.blacklist -emit-llvm %s -o - 2>&1 | FileCheck %s --check-prefix=INVALID-MAPPED-FILE +// INVALID-MAPPED-FILE: invalid-virtual-file.blacklist': No such file or directory + +// RUN: not %clang_cc1 -fsanitize=unsigned-integer-overflow -ivfsoverlay %t-vfsoverlay.yaml -fsanitize-blacklist=%t-nonexistent.blacklist -emit-llvm %s -o - 2>&1 | FileCheck %s --check-prefix=INVALID +// INVALID: nonexistent.blacklist': No such file or directory + unsigned i; // DEFAULT: @hash From 68491f50720de7da61866a478fa1971baa68e4e9 Mon Sep 17 00:00:00 2001 From: Lawrence D'Anna Date: Tue, 8 Oct 2019 01:16:29 +0000 Subject: [PATCH 203/254] test fix: TestLoadUsingPaths should use realpath Summary: TestLoadUsingPaths will fail if the build directory has symlinks in its path, because the real paths reported by the debugger won't match the symlink-laden paths it's expecting. This can be solved just by using os.path.realpath on the base path for the test. Reviewers: JDevlieghere, jasonmolenda, labath Reviewed By: JDevlieghere Subscribers: lldb-commits Tags: #lldb Differential Revision: https://reviews.llvm.org/D68618 llvm-svn: 374007 --- .../test/functionalities/load_using_paths/TestLoadUsingPaths.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lldb/packages/Python/lldbsuite/test/functionalities/load_using_paths/TestLoadUsingPaths.py b/lldb/packages/Python/lldbsuite/test/functionalities/load_using_paths/TestLoadUsingPaths.py index 0ffce6213ef4e..e9cafba8932bf 100644 --- a/lldb/packages/Python/lldbsuite/test/functionalities/load_using_paths/TestLoadUsingPaths.py +++ b/lldb/packages/Python/lldbsuite/test/functionalities/load_using_paths/TestLoadUsingPaths.py @@ -33,7 +33,7 @@ def setUp(self): ext = 'dylib' self.lib_name = 'libloadunload.' + ext - self.wd = self.getBuildDir() + self.wd = os.path.realpath(self.getBuildDir()) self.hidden_dir = os.path.join(self.wd, 'hidden') self.hidden_lib = os.path.join(self.hidden_dir, self.lib_name) From e21399b02e2651eb461301452355fc821d28712b Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Tue, 8 Oct 2019 01:16:59 +0000 Subject: [PATCH 204/254] Revert "ProcessInstanceInfoMatch: Don't match processes with no name if a name match was requested" This breaks TestProcessAttach and TestHelloWorld on Darwin. llvm-svn: 374008 --- lldb/source/Utility/ProcessInfo.cpp | 2 +- .../Utility/ProcessInstanceInfoTest.cpp | 17 ----------------- 2 files changed, 1 insertion(+), 18 deletions(-) diff --git a/lldb/source/Utility/ProcessInfo.cpp b/lldb/source/Utility/ProcessInfo.cpp index 6be47d377a2af..832e5efae29c8 100644 --- a/lldb/source/Utility/ProcessInfo.cpp +++ b/lldb/source/Utility/ProcessInfo.cpp @@ -244,7 +244,7 @@ void ProcessInstanceInfo::DumpAsTableRow(Stream &s, UserIDResolver &resolver, } bool ProcessInstanceInfoMatch::NameMatches(const char *process_name) const { - if (m_name_match_type == NameMatch::Ignore) + if (m_name_match_type == NameMatch::Ignore || process_name == nullptr) return true; const char *match_name = m_match_info.GetName(); if (!match_name) diff --git a/lldb/unittests/Utility/ProcessInstanceInfoTest.cpp b/lldb/unittests/Utility/ProcessInstanceInfoTest.cpp index 1d363ac80a365..73978836c5be5 100644 --- a/lldb/unittests/Utility/ProcessInstanceInfoTest.cpp +++ b/lldb/unittests/Utility/ProcessInstanceInfoTest.cpp @@ -91,20 +91,3 @@ TEST(ProcessInstanceInfo, DumpTable_invalidUID) { )", s.GetData()); } - -TEST(ProcessInstanceInfoMatch, Name) { - ProcessInstanceInfo info_bar, info_empty; - info_bar.GetExecutableFile().SetFile("/foo/bar", FileSpec::Style::posix); - - ProcessInstanceInfoMatch match; - match.SetNameMatchType(NameMatch::Equals); - match.GetProcessInfo().GetExecutableFile().SetFile("bar", - FileSpec::Style::posix); - - EXPECT_TRUE(match.Matches(info_bar)); - EXPECT_FALSE(match.Matches(info_empty)); - - match.GetProcessInfo().GetExecutableFile() = FileSpec(); - EXPECT_TRUE(match.Matches(info_bar)); - EXPECT_TRUE(match.Matches(info_empty)); -} From cb194057b9d6d1cd68b3aa98b59c6cb323293d51 Mon Sep 17 00:00:00 2001 From: Andrew Trick Date: Tue, 8 Oct 2019 01:31:02 +0000 Subject: [PATCH 205/254] [LitConfig] Silenced notes/warnings on quiet. Lit has a "quiet" option, -q, which is documented to "suppress no error output". Previously, LitConfig displayed notes and warnings when the quiet option was specified. The result was that it was not possible to get only pertinent file/line information to be used by an editor to jump to the location where checks were failing without passing a number of unhelpful locations first. Here, the implementations of LitConfig.note and LitConfig.warning are modified to account for the quiet flag and avoid displaying if the flag has indeed been set. Patch by Nate Chandler Reviewed by yln Differential Revision: https://reviews.llvm.org/D68044 llvm-svn: 374009 --- llvm/utils/lit/lit/LitConfig.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/llvm/utils/lit/lit/LitConfig.py b/llvm/utils/lit/lit/LitConfig.py index 35ff0590bd6bf..881d9fa86a58a 100644 --- a/llvm/utils/lit/lit/LitConfig.py +++ b/llvm/utils/lit/lit/LitConfig.py @@ -174,10 +174,12 @@ def _write_message(self, kind, message): kind, message)) def note(self, message): - self._write_message('note', message) + if not self.quiet: + self._write_message('note', message) def warning(self, message): - self._write_message('warning', message) + if not self.quiet: + self._write_message('warning', message) self.numWarnings += 1 def error(self, message): From 54d767f508bbcbdafd57bdbfcc4036173c2708e8 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Tue, 8 Oct 2019 02:00:53 +0000 Subject: [PATCH 206/254] [sanitizer] Fix signal_trap_handler.cpp on android llvm-svn: 374010 --- .../TestCases/Linux/signal_trap_handler.cpp | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/compiler-rt/test/sanitizer_common/TestCases/Linux/signal_trap_handler.cpp b/compiler-rt/test/sanitizer_common/TestCases/Linux/signal_trap_handler.cpp index 9b4bc067e4920..f7af8aa850f0b 100644 --- a/compiler-rt/test/sanitizer_common/TestCases/Linux/signal_trap_handler.cpp +++ b/compiler-rt/test/sanitizer_common/TestCases/Linux/signal_trap_handler.cpp @@ -3,11 +3,15 @@ #include #include #include +#include -int handled; +int in_handler; void handler(int signo, siginfo_t *info, void *uctx) { - handled = 1; + fprintf(stderr, "in_handler: %d\n", in_handler); + fflush(stderr); + // CHECK: in_handler: 1 + _Exit(0); } int main() { @@ -21,9 +25,10 @@ int main() { assert(a.sa_sigaction == handler); assert(a.sa_flags & SA_SIGINFO); + in_handler = 1; __builtin_debugtrap(); - assert(handled); - fprintf(stderr, "HANDLED %d\n", handled); -} + in_handler = 0; -// CHECK: HANDLED 1 + fprintf(stderr, "UNREACHABLE\n"); + return 1; +} From d6609a404fecafe479be8a012b99e3f278f86275 Mon Sep 17 00:00:00 2001 From: Jan Korous Date: Tue, 8 Oct 2019 02:26:17 +0000 Subject: [PATCH 207/254] [NFC] Fix ubsan-blacklist test Restored original test and marked tests for VFS as unsupported on Windows. llvm-svn: 374011 --- clang/test/CodeGen/ubsan-blacklist-vfs.c | 38 ++++++++++++++++++++++++ clang/test/CodeGen/ubsan-blacklist.c | 11 ------- 2 files changed, 38 insertions(+), 11 deletions(-) create mode 100644 clang/test/CodeGen/ubsan-blacklist-vfs.c diff --git a/clang/test/CodeGen/ubsan-blacklist-vfs.c b/clang/test/CodeGen/ubsan-blacklist-vfs.c new file mode 100644 index 0000000000000..c076eec95a4b4 --- /dev/null +++ b/clang/test/CodeGen/ubsan-blacklist-vfs.c @@ -0,0 +1,38 @@ +// UNSUPPORTED: system-windows + +// Verify ubsan doesn't emit checks for blacklisted functions and files +// RUN: echo "fun:hash" > %t-func.blacklist +// RUN: echo "src:%s" | sed -e 's/\\/\\\\/g' > %t-file.blacklist + +// RUN: rm -f %t-vfsoverlay.yaml +// RUN: rm -f %t-nonexistent.blacklist +// RUN: sed -e "s|@DIR@|%/T|g" %S/Inputs/sanitizer-blacklist-vfsoverlay.yaml | sed -e "s|@REAL_FILE@|%/t-func.blacklist|g" | sed -e "s|@NONEXISTENT_FILE@|%/t-nonexistent.blacklist|g" > %t-vfsoverlay.yaml +// RUN: %clang_cc1 -fsanitize=unsigned-integer-overflow -ivfsoverlay %t-vfsoverlay.yaml -fsanitize-blacklist=%T/only-virtual-file.blacklist -emit-llvm %s -o - | FileCheck %s --check-prefix=FUNC + +// RUN: not %clang_cc1 -fsanitize=unsigned-integer-overflow -ivfsoverlay %t-vfsoverlay.yaml -fsanitize-blacklist=%T/invalid-virtual-file.blacklist -emit-llvm %s -o - 2>&1 | FileCheck %s --check-prefix=INVALID-MAPPED-FILE +// INVALID-MAPPED-FILE: invalid-virtual-file.blacklist': No such file or directory + +// RUN: not %clang_cc1 -fsanitize=unsigned-integer-overflow -ivfsoverlay %t-vfsoverlay.yaml -fsanitize-blacklist=%t-nonexistent.blacklist -emit-llvm %s -o - 2>&1 | FileCheck %s --check-prefix=INVALID +// INVALID: nonexistent.blacklist': No such file or directory + +unsigned i; + +// DEFAULT: @hash +// FUNC: @hash +// FILE: @hash +unsigned hash() { +// DEFAULT: call {{.*}}void @__ubsan +// FUNC-NOT: call {{.*}}void @__ubsan +// FILE-NOT: call {{.*}}void @__ubsan + return i * 37; +} + +// DEFAULT: @add +// FUNC: @add +// FILE: @add +unsigned add() { +// DEFAULT: call {{.*}}void @__ubsan +// FUNC: call {{.*}}void @__ubsan +// FILE-NOT: call {{.*}}void @__ubsan + return i + 1; +} diff --git a/clang/test/CodeGen/ubsan-blacklist.c b/clang/test/CodeGen/ubsan-blacklist.c index 61d33c983e619..666003bd9233c 100644 --- a/clang/test/CodeGen/ubsan-blacklist.c +++ b/clang/test/CodeGen/ubsan-blacklist.c @@ -5,17 +5,6 @@ // RUN: %clang_cc1 -fsanitize=unsigned-integer-overflow -fsanitize-blacklist=%t-func.blacklist -emit-llvm %s -o - | FileCheck %s --check-prefix=FUNC // RUN: %clang_cc1 -fsanitize=unsigned-integer-overflow -fsanitize-blacklist=%t-file.blacklist -emit-llvm %s -o - | FileCheck %s --check-prefix=FILE -// RUN: rm -f %t-vfsoverlay.yaml -// RUN: rm -f %t-nonexistent.blacklist -// RUN: sed -e "s|@DIR@|%/T|g" %S/Inputs/sanitizer-blacklist-vfsoverlay.yaml | sed -e "s|@REAL_FILE@|%/t-func.blacklist|g" | sed -e "s|@NONEXISTENT_FILE@|%/t-nonexistent.blacklist|g" > %t-vfsoverlay.yaml -// RUN: %clang_cc1 -fsanitize=unsigned-integer-overflow -ivfsoverlay %t-vfsoverlay.yaml -fsanitize-blacklist=%T/only-virtual-file.blacklist -emit-llvm %s -o - | FileCheck %s --check-prefix=FUNC - -// RUN: not %clang_cc1 -fsanitize=unsigned-integer-overflow -ivfsoverlay %t-vfsoverlay.yaml -fsanitize-blacklist=%T/invalid-virtual-file.blacklist -emit-llvm %s -o - 2>&1 | FileCheck %s --check-prefix=INVALID-MAPPED-FILE -// INVALID-MAPPED-FILE: invalid-virtual-file.blacklist': No such file or directory - -// RUN: not %clang_cc1 -fsanitize=unsigned-integer-overflow -ivfsoverlay %t-vfsoverlay.yaml -fsanitize-blacklist=%t-nonexistent.blacklist -emit-llvm %s -o - 2>&1 | FileCheck %s --check-prefix=INVALID -// INVALID: nonexistent.blacklist': No such file or directory - unsigned i; // DEFAULT: @hash From 66e276862781f6edc4e757695e20b69b3fb11d49 Mon Sep 17 00:00:00 2001 From: James Clarke Date: Tue, 8 Oct 2019 02:28:57 +0000 Subject: [PATCH 208/254] [ItaniumMangle] Fix mangling of GNU __null in an expression to match GCC Reviewers: rsmith Reviewed By: rsmith Subscribers: erik.pilkington, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D68368 llvm-svn: 374013 --- clang/lib/AST/ItaniumMangle.cpp | 7 +++++-- clang/test/CodeGenCXX/mangle-exprs.cpp | 16 ++++++++++++++++ 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp index bea96dec9da97..c6f7143251a6f 100644 --- a/clang/lib/AST/ItaniumMangle.cpp +++ b/clang/lib/AST/ItaniumMangle.cpp @@ -4273,8 +4273,11 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { } case Expr::GNUNullExprClass: - // FIXME: should this really be mangled the same as nullptr? - // fallthrough + // Mangle as if an integer literal 0. + Out << 'L'; + mangleType(E->getType()); + Out << "0E"; + break; case Expr::CXXNullPtrLiteralExprClass: { Out << "LDnE"; diff --git a/clang/test/CodeGenCXX/mangle-exprs.cpp b/clang/test/CodeGenCXX/mangle-exprs.cpp index 6c46402694036..1b99272b7f22a 100644 --- a/clang/test/CodeGenCXX/mangle-exprs.cpp +++ b/clang/test/CodeGenCXX/mangle-exprs.cpp @@ -373,3 +373,19 @@ namespace designated_init { template void f(decltype(T{.a.b[3][1 ... 4] = 9}) x) {} void use_f(A a) { f(a); } } + +namespace null { + template + void cpp_nullptr(typename enable_if

::type* = 0) { + } + + template + void gnu_null(typename enable_if

::type* = 0) { + } + + // CHECK-LABEL: define {{.*}} @_ZN4null11cpp_nullptrILDn0EEEvPN9enable_ifIXeqT_LDnEEvE4typeE + template void cpp_nullptr(void *); + + // CHECK-LABEL: define {{.*}} @_ZN4null8gnu_nullILPv0EEEvPN9enable_ifIXeqT_Ll0EEvE4typeE + template void gnu_null(void *); +} From a58ddba1137dc11462955a5a4aecda112923c8a8 Mon Sep 17 00:00:00 2001 From: Heejin Ahn Date: Tue, 8 Oct 2019 02:50:27 +0000 Subject: [PATCH 209/254] [WebAssembly] Add REQUIRES: asserts to cfg-stackify-eh.ll This was missing in D68552. llvm-svn: 374015 --- llvm/test/CodeGen/WebAssembly/cfg-stackify-eh.ll | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/test/CodeGen/WebAssembly/cfg-stackify-eh.ll b/llvm/test/CodeGen/WebAssembly/cfg-stackify-eh.ll index f33f56701ebb8..9d75aaa92839f 100644 --- a/llvm/test/CodeGen/WebAssembly/cfg-stackify-eh.ll +++ b/llvm/test/CodeGen/WebAssembly/cfg-stackify-eh.ll @@ -1,3 +1,4 @@ +; REQUIRES: asserts ; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -disable-block-placement -verify-machineinstrs -fast-isel=false -machine-sink-split-probability-threshold=0 -cgp-freq-ratio-to-skip-merge=1000 -exception-model=wasm -mattr=+exception-handling | FileCheck %s ; RUN: llc < %s -O0 -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -verify-machineinstrs -exception-model=wasm -mattr=+exception-handling | FileCheck %s --check-prefix=NOOPT ; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -disable-block-placement -verify-machineinstrs -fast-isel=false -machine-sink-split-probability-threshold=0 -cgp-freq-ratio-to-skip-merge=1000 -exception-model=wasm -mattr=+exception-handling -wasm-disable-ehpad-sort | FileCheck %s --check-prefix=NOSORT From 9806a1d5f90a21a16c4bfc6d4bb10e0d5b870573 Mon Sep 17 00:00:00 2001 From: Chen Zheng Date: Tue, 8 Oct 2019 03:00:31 +0000 Subject: [PATCH 210/254] [ConstantRange] [NFC] replace addWithNoSignedWrap with addWithNoWrap. llvm-svn: 374016 --- llvm/include/llvm/IR/ConstantRange.h | 4 --- llvm/lib/IR/ConstantRange.cpp | 10 ------- llvm/lib/Transforms/Scalar/IndVarSimplify.cpp | 4 +-- llvm/unittests/IR/ConstantRangeTest.cpp | 26 ------------------- 4 files changed, 2 insertions(+), 42 deletions(-) diff --git a/llvm/include/llvm/IR/ConstantRange.h b/llvm/include/llvm/IR/ConstantRange.h index ce4b851cb4572..964f9e8e9bc9c 100644 --- a/llvm/include/llvm/IR/ConstantRange.h +++ b/llvm/include/llvm/IR/ConstantRange.h @@ -338,10 +338,6 @@ class LLVM_NODISCARD ConstantRange { ConstantRange addWithNoWrap(const ConstantRange &Other, unsigned NoWrapKind, PreferredRangeType RangeType = Smallest) const; - /// Return a new range representing the possible values resulting from a - /// known NSW addition of a value in this range and \p Other constant. - ConstantRange addWithNoSignedWrap(const APInt &Other) const; - /// Return a new range representing the possible values resulting /// from a subtraction of a value in this range and a value in \p Other. ConstantRange sub(const ConstantRange &Other) const; diff --git a/llvm/lib/IR/ConstantRange.cpp b/llvm/lib/IR/ConstantRange.cpp index 8ea688ff1f07e..592042bc0c788 100644 --- a/llvm/lib/IR/ConstantRange.cpp +++ b/llvm/lib/IR/ConstantRange.cpp @@ -866,16 +866,6 @@ ConstantRange ConstantRange::addWithNoWrap(const ConstantRange &Other, return Result; } -ConstantRange ConstantRange::addWithNoSignedWrap(const APInt &Other) const { - // Calculate the subset of this range such that "X + Other" is - // guaranteed not to wrap (overflow) for all X in this subset. - auto NSWRange = ConstantRange::makeExactNoWrapRegion( - BinaryOperator::Add, Other, OverflowingBinaryOperator::NoSignedWrap); - auto NSWConstrainedRange = intersectWith(NSWRange); - - return NSWConstrainedRange.add(ConstantRange(Other)); -} - ConstantRange ConstantRange::sub(const ConstantRange &Other) const { if (isEmptySet() || Other.isEmptySet()) diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp index 1aaa0265bade6..c55783ac2154f 100644 --- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -1839,8 +1839,8 @@ void WidenIV::calculatePostIncRange(Instruction *NarrowDef, auto CmpRHSRange = SE->getSignedRange(SE->getSCEV(CmpRHS)); auto CmpConstrainedLHSRange = ConstantRange::makeAllowedICmpRegion(P, CmpRHSRange); - auto NarrowDefRange = - CmpConstrainedLHSRange.addWithNoSignedWrap(*NarrowDefRHS); + auto NarrowDefRange = CmpConstrainedLHSRange.addWithNoWrap( + *NarrowDefRHS, OverflowingBinaryOperator::NoSignedWrap); updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange); }; diff --git a/llvm/unittests/IR/ConstantRangeTest.cpp b/llvm/unittests/IR/ConstantRangeTest.cpp index 7c43d2fb09cc9..58a25166d83dc 100644 --- a/llvm/unittests/IR/ConstantRangeTest.cpp +++ b/llvm/unittests/IR/ConstantRangeTest.cpp @@ -643,32 +643,6 @@ TEST_F(ConstantRangeTest, Add) { ConstantRange(APInt(16, 0xe))); } -TEST_F(ConstantRangeTest, AddWithNoSignedWrap) { - EXPECT_EQ(Empty.addWithNoSignedWrap(APInt(16, 1)), Empty); - EXPECT_EQ(Full.addWithNoSignedWrap(APInt(16, 1)), - ConstantRange(APInt(16, INT16_MIN+1), APInt(16, INT16_MIN))); - EXPECT_EQ(ConstantRange(APInt(8, -50), APInt(8, 50)).addWithNoSignedWrap(APInt(8, 10)), - ConstantRange(APInt(8, -40), APInt(8, 60))); - EXPECT_EQ(ConstantRange(APInt(8, -50), APInt(8, 120)).addWithNoSignedWrap(APInt(8, 10)), - ConstantRange(APInt(8, -40), APInt(8, INT8_MIN))); - EXPECT_EQ(ConstantRange(APInt(8, 120), APInt(8, -10)).addWithNoSignedWrap(APInt(8, 5)), - ConstantRange(APInt(8, 125), APInt(8, -5))); - EXPECT_EQ(ConstantRange(APInt(8, 120), APInt(8, -120)).addWithNoSignedWrap(APInt(8, 10)), - ConstantRange(APInt(8, INT8_MIN+10), APInt(8, -110))); - - EXPECT_EQ(Empty.addWithNoSignedWrap(APInt(16, -1)), Empty); - EXPECT_EQ(Full.addWithNoSignedWrap(APInt(16, -1)), - ConstantRange(APInt(16, INT16_MIN), APInt(16, INT16_MAX))); - EXPECT_EQ(ConstantRange(APInt(8, -50), APInt(8, 50)).addWithNoSignedWrap(APInt(8, -10)), - ConstantRange(APInt(8, -60), APInt(8, 40))); - EXPECT_EQ(ConstantRange(APInt(8, -120), APInt(8, 50)).addWithNoSignedWrap(APInt(8, -10)), - ConstantRange(APInt(8, INT8_MIN), APInt(8, 40))); - EXPECT_EQ(ConstantRange(APInt(8, 120), APInt(8, -120)).addWithNoSignedWrap(APInt(8, -5)), - ConstantRange(APInt(8, 115), APInt(8, -125))); - EXPECT_EQ(ConstantRange(APInt(8, 120), APInt(8, -120)).addWithNoSignedWrap(APInt(8, -10)), - ConstantRange(APInt(8, 110), APInt(8, INT8_MIN-10))); -} - template static void TestAddWithNoSignedWrapExhaustive(Fn1 RangeFn, Fn2 IntFn) { unsigned Bits = 4; From 9f41deccc0e648a006c9f38e11919f181b6c7e0a Mon Sep 17 00:00:00 2001 From: Zi Xuan Wu Date: Tue, 8 Oct 2019 03:28:33 +0000 Subject: [PATCH 211/254] [LoopVectorize][PowerPC] Estimate int and float register pressure separately in loop-vectorize In loop-vectorize, interleave count and vector factor depend on target register number. Currently, it does not estimate different register pressure for different register class separately(especially for scalar type, float type should not be on the same position with int type), so it's not accurate. Specifically, it causes too many times interleaving/unrolling, result in too many register spills in loop body and hurting performance. So we need classify the register classes in IR level, and importantly these are abstract register classes, and are not the target register class of backend provided in td file. It's used to establish the mapping between the types of IR values and the number of simultaneous live ranges to which we'd like to limit for some set of those types. For example, POWER target, register num is special when VSX is enabled. When VSX is enabled, the number of int scalar register is 32(GPR), float is 64(VSR), but for int and float vector register both are 64(VSR). So there should be 2 kinds of register class when vsx is enabled, and 3 kinds of register class when VSX is NOT enabled. It runs on POWER target, it makes big(+~30%) performance improvement in one specific bmk(503.bwaves_r) of spec2017 and no other obvious degressions. Differential revision: https://reviews.llvm.org/D67148 llvm-svn: 374017 --- .../llvm/Analysis/TargetTransformInfo.h | 35 +++- .../llvm/Analysis/TargetTransformInfoImpl.h | 15 +- llvm/include/llvm/CodeGen/BasicTTIImpl.h | 2 - llvm/lib/Analysis/TargetTransformInfo.cpp | 12 +- .../AArch64/AArch64TargetTransformInfo.h | 3 +- llvm/lib/Target/ARM/ARMTargetTransformInfo.h | 3 +- .../Target/PowerPC/PPCTargetTransformInfo.cpp | 35 +++- .../Target/PowerPC/PPCTargetTransformInfo.h | 8 +- .../SystemZ/SystemZTargetTransformInfo.cpp | 3 +- .../SystemZ/SystemZTargetTransformInfo.h | 2 +- .../WebAssemblyTargetTransformInfo.cpp | 5 +- .../WebAssemblyTargetTransformInfo.h | 2 +- .../lib/Target/X86/X86TargetTransformInfo.cpp | 3 +- llvm/lib/Target/X86/X86TargetTransformInfo.h | 2 +- .../Target/XCore/XCoreTargetTransformInfo.h | 3 +- .../Transforms/Scalar/LoopStrengthReduce.cpp | 4 +- .../Transforms/Vectorize/LoopVectorize.cpp | 151 ++++++++++----- .../Transforms/Vectorize/SLPVectorizer.cpp | 2 +- .../LoopVectorize/PowerPC/reg-usage.ll | 178 ++++++++++++++++++ .../LoopVectorize/X86/reg-usage-debug.ll | 12 +- .../Transforms/LoopVectorize/X86/reg-usage.ll | 34 +++- 21 files changed, 428 insertions(+), 86 deletions(-) create mode 100644 llvm/test/Transforms/LoopVectorize/PowerPC/reg-usage.ll diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index 6da2d7f43bc42..abea2afe26b75 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -788,10 +788,23 @@ class TargetTransformInfo { /// Additional properties of an operand's values. enum OperandValueProperties { OP_None = 0, OP_PowerOf2 = 1 }; - /// \return The number of scalar or vector registers that the target has. - /// If 'Vectors' is true, it returns the number of vector registers. If it is - /// set to false, it returns the number of scalar registers. - unsigned getNumberOfRegisters(bool Vector) const; + /// \return the number of registers in the target-provided register class. + unsigned getNumberOfRegisters(unsigned ClassID) const; + + /// \return the target-provided register class ID for the provided type, + /// accounting for type promotion and other type-legalization techniques that the target might apply. + /// However, it specifically does not account for the scalarization or splitting of vector types. + /// Should a vector type require scalarization or splitting into multiple underlying vector registers, + /// that type should be mapped to a register class containing no registers. + /// Specifically, this is designed to provide a simple, high-level view of the register allocation + /// later performed by the backend. These register classes don't necessarily map onto the + /// register classes used by the backend. + /// FIXME: It's not currently possible to determine how many registers + /// are used by the provided type. + unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const; + + /// \return the target-provided register class name + const char* getRegisterClassName(unsigned ClassID) const; /// \return The width of the largest scalar or vector register type. unsigned getRegisterBitWidth(bool Vector) const; @@ -1243,7 +1256,9 @@ class TargetTransformInfo::Concept { Type *Ty) = 0; virtual int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty) = 0; - virtual unsigned getNumberOfRegisters(bool Vector) = 0; + virtual unsigned getNumberOfRegisters(unsigned ClassID) const = 0; + virtual unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const = 0; + virtual const char* getRegisterClassName(unsigned ClassID) const = 0; virtual unsigned getRegisterBitWidth(bool Vector) const = 0; virtual unsigned getMinVectorRegisterBitWidth() = 0; virtual bool shouldMaximizeVectorBandwidth(bool OptSize) const = 0; @@ -1586,8 +1601,14 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept { Type *Ty) override { return Impl.getIntImmCost(IID, Idx, Imm, Ty); } - unsigned getNumberOfRegisters(bool Vector) override { - return Impl.getNumberOfRegisters(Vector); + unsigned getNumberOfRegisters(unsigned ClassID) const override { + return Impl.getNumberOfRegisters(ClassID); + } + unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const override { + return Impl.getRegisterClassForType(Vector, Ty); + } + const char* getRegisterClassName(unsigned ClassID) const override { + return Impl.getRegisterClassName(ClassID); } unsigned getRegisterBitWidth(bool Vector) const override { return Impl.getRegisterBitWidth(Vector); diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index 2f1011799f137..f850d9a7bb567 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -354,7 +354,20 @@ class TargetTransformInfoImplBase { return TTI::TCC_Free; } - unsigned getNumberOfRegisters(bool Vector) { return 8; } + unsigned getNumberOfRegisters(unsigned ClassID) const { return 8; } + + unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const { + return Vector ? 1 : 0; + }; + + const char* getRegisterClassName(unsigned ClassID) const { + switch (ClassID) { + default: + return "Generic::Unknown Register Class"; + case 0: return "Generic::ScalarRC"; + case 1: return "Generic::VectorRC"; + } + } unsigned getRegisterBitWidth(bool Vector) const { return 32; } diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index 75e0f844fd075..1cf9cc61f219e 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -519,8 +519,6 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { /// \name Vector TTI Implementations /// @{ - unsigned getNumberOfRegisters(bool Vector) { return Vector ? 0 : 1; } - unsigned getRegisterBitWidth(bool Vector) const { return 32; } /// Estimate the overhead of scalarizing an instruction. Insert and Extract diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index f3d20ce984dbd..aa93e1d034fdb 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -466,8 +466,16 @@ int TargetTransformInfo::getIntImmCost(Intrinsic::ID IID, unsigned Idx, return Cost; } -unsigned TargetTransformInfo::getNumberOfRegisters(bool Vector) const { - return TTIImpl->getNumberOfRegisters(Vector); +unsigned TargetTransformInfo::getNumberOfRegisters(unsigned ClassID) const { + return TTIImpl->getNumberOfRegisters(ClassID); +} + +unsigned TargetTransformInfo::getRegisterClassForType(bool Vector, Type *Ty) const { + return TTIImpl->getRegisterClassForType(Vector, Ty); +} + +const char* TargetTransformInfo::getRegisterClassName(unsigned ClassID) const { + return TTIImpl->getRegisterClassName(ClassID); } unsigned TargetTransformInfo::getRegisterBitWidth(bool Vector) const { diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h index 95cda63b01744..d5ef0e5bea3b7 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -85,7 +85,8 @@ class AArch64TTIImpl : public BasicTTIImplBase { bool enableInterleavedAccessVectorization() { return true; } - unsigned getNumberOfRegisters(bool Vector) { + unsigned getNumberOfRegisters(unsigned ClassID) const { + bool Vector = (ClassID == 1); if (Vector) { if (ST->hasNEON()) return 32; diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h index 47e98dac9f6bf..b878ea3a17121 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h @@ -122,7 +122,8 @@ class ARMTTIImpl : public BasicTTIImplBase { /// \name Vector TTI Implementations /// @{ - unsigned getNumberOfRegisters(bool Vector) { + unsigned getNumberOfRegisters(unsigned ClassID) const { + bool Vector = (ClassID == 1); if (Vector) { if (ST->hasNEON()) return 16; diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index 40e536687014b..764335128d424 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -594,10 +594,37 @@ bool PPCTTIImpl::enableInterleavedAccessVectorization() { return true; } -unsigned PPCTTIImpl::getNumberOfRegisters(bool Vector) { - if (Vector && !ST->hasAltivec() && !ST->hasQPX()) - return 0; - return ST->hasVSX() ? 64 : 32; +unsigned PPCTTIImpl::getNumberOfRegisters(unsigned ClassID) const { + assert(ClassID == GPRRC || ClassID == FPRRC || + ClassID == VRRC || ClassID == VSXRC); + if (ST->hasVSX()) { + assert(ClassID == GPRRC || ClassID == VSXRC); + return ClassID == GPRRC ? 32 : 64; + } + assert(ClassID == GPRRC || ClassID == FPRRC || ClassID == VRRC); + return 32; +} + +unsigned PPCTTIImpl::getRegisterClassForType(bool Vector, Type *Ty) const { + if (Vector) + return ST->hasVSX() ? VSXRC : VRRC; + else if (Ty && Ty->getScalarType()->isFloatTy()) + return ST->hasVSX() ? VSXRC : FPRRC; + else + return GPRRC; +} + +const char* PPCTTIImpl::getRegisterClassName(unsigned ClassID) const { + + switch (ClassID) { + default: + llvm_unreachable("unknown register class"); + return "PPC::unknown register class"; + case GPRRC: return "PPC::GPRRC"; + case FPRRC: return "PPC::FPRRC"; + case VRRC: return "PPC::VRRC"; + case VSXRC: return "PPC::VSXRC"; + } } unsigned PPCTTIImpl::getRegisterBitWidth(bool Vector) const { diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h index 5d76ee418b694..294c970f21fb5 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h @@ -72,7 +72,13 @@ class PPCTTIImpl : public BasicTTIImplBase { TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const; bool enableInterleavedAccessVectorization(); - unsigned getNumberOfRegisters(bool Vector); + + enum PPCRegisterClass { + GPRRC, FPRRC, VRRC, VSXRC + }; + unsigned getNumberOfRegisters(unsigned ClassID) const; + unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const; + const char* getRegisterClassName(unsigned ClassID) const; unsigned getRegisterBitWidth(bool Vector) const; unsigned getCacheLineSize(); unsigned getPrefetchDistance(); diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp index 8d45e67d73c26..11c99aa111745 100644 --- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp @@ -304,7 +304,8 @@ bool SystemZTTIImpl::isLSRCostLess(TargetTransformInfo::LSRCost &C1, C2.ScaleCost, C2.SetupCost); } -unsigned SystemZTTIImpl::getNumberOfRegisters(bool Vector) { +unsigned SystemZTTIImpl::getNumberOfRegisters(unsigned ClassID) const { + bool Vector = (ClassID == 1); if (!Vector) // Discount the stack pointer. Also leave out %r0, since it can't // be used in an address. diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h index 16ce2ef1d7a00..e59badeb944df 100644 --- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h +++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h @@ -56,7 +56,7 @@ class SystemZTTIImpl : public BasicTTIImplBase { /// \name Vector TTI Implementations /// @{ - unsigned getNumberOfRegisters(bool Vector); + unsigned getNumberOfRegisters(unsigned ClassID) const; unsigned getRegisterBitWidth(bool Vector) const; unsigned getCacheLineSize() { return 256; } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp index 46ef765ce0f4b..1c53e90daea7b 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp @@ -25,10 +25,11 @@ WebAssemblyTTIImpl::getPopcntSupport(unsigned TyWidth) const { return TargetTransformInfo::PSK_FastHardware; } -unsigned WebAssemblyTTIImpl::getNumberOfRegisters(bool Vector) { - unsigned Result = BaseT::getNumberOfRegisters(Vector); +unsigned WebAssemblyTTIImpl::getNumberOfRegisters(unsigned ClassID) const { + unsigned Result = BaseT::getNumberOfRegisters(ClassID); // For SIMD, use at least 16 registers, as a rough guess. + bool Vector = (ClassID == 1); if (Vector) Result = std::max(Result, 16u); diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h index 1b11b4b631eb9..f0ecc73e91de7 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h @@ -53,7 +53,7 @@ class WebAssemblyTTIImpl final : public BasicTTIImplBase { /// \name Vector TTI Implementations /// @{ - unsigned getNumberOfRegisters(bool Vector); + unsigned getNumberOfRegisters(unsigned ClassID) const; unsigned getRegisterBitWidth(bool Vector) const; unsigned getArithmeticInstrCost( unsigned Opcode, Type *Ty, diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index b634da1d51fbe..2f419b78f83cf 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -116,7 +116,8 @@ llvm::Optional X86TTIImpl::getCacheAssociativity( llvm_unreachable("Unknown TargetTransformInfo::CacheLevel"); } -unsigned X86TTIImpl::getNumberOfRegisters(bool Vector) { +unsigned X86TTIImpl::getNumberOfRegisters(unsigned ClassID) const { + bool Vector = (ClassID == 1); if (Vector && !ST->hasSSE1()) return 0; diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h index 9b948dbbb4cb9..3ff1896f5052c 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.h +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h @@ -116,7 +116,7 @@ class X86TTIImpl : public BasicTTIImplBase { /// \name Vector TTI Implementations /// @{ - unsigned getNumberOfRegisters(bool Vector); + unsigned getNumberOfRegisters(unsigned ClassID) const; unsigned getRegisterBitWidth(bool Vector) const; unsigned getLoadStoreVecRegBitWidth(unsigned AS) const; unsigned getMaxInterleaveFactor(unsigned VF); diff --git a/llvm/lib/Target/XCore/XCoreTargetTransformInfo.h b/llvm/lib/Target/XCore/XCoreTargetTransformInfo.h index 3fecaaa597224..58df1f290ec9d 100644 --- a/llvm/lib/Target/XCore/XCoreTargetTransformInfo.h +++ b/llvm/lib/Target/XCore/XCoreTargetTransformInfo.h @@ -40,7 +40,8 @@ class XCoreTTIImpl : public BasicTTIImplBase { : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl()), TLI(ST->getTargetLowering()) {} - unsigned getNumberOfRegisters(bool Vector) { + unsigned getNumberOfRegisters(unsigned ClassID) const { + bool Vector = (ClassID == 1); if (Vector) { return 0; } diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 852bbefaf20b6..7f119175c4a82 100644 --- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -1386,7 +1386,9 @@ void Cost::RateFormula(const Formula &F, // Treat every new register that exceeds TTI.getNumberOfRegisters() - 1 as // additional instruction (at least fill). - unsigned TTIRegNum = TTI->getNumberOfRegisters(false) - 1; + // TODO: Need distinguish register class? + unsigned TTIRegNum = TTI->getNumberOfRegisters( + TTI->getRegisterClassForType(false, F.getType())) - 1; if (C.NumRegs > TTIRegNum) { // Cost already exceeded TTIRegNum, then only newly added register can add // new instructions. diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 7e95038a5ebcf..11e1cd003b426 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -983,10 +983,11 @@ class LoopVectorizationCostModel { /// of a loop. struct RegisterUsage { /// Holds the number of loop invariant values that are used in the loop. - unsigned LoopInvariantRegs; - + /// The key is ClassID of target-provided register class. + SmallMapVector LoopInvariantRegs; /// Holds the maximum number of concurrent live intervals in the loop. - unsigned MaxLocalUsers; + /// The key is ClassID of target-provided register class. + SmallMapVector MaxLocalUsers; }; /// \return Returns information about the register usages of the loop for the @@ -4962,9 +4963,14 @@ LoopVectorizationCostModel::computeFeasibleMaxVF(unsigned ConstTripCount) { // Select the largest VF which doesn't require more registers than existing // ones. - unsigned TargetNumRegisters = TTI.getNumberOfRegisters(true); for (int i = RUs.size() - 1; i >= 0; --i) { - if (RUs[i].MaxLocalUsers <= TargetNumRegisters) { + bool Selected = true; + for (auto& pair : RUs[i].MaxLocalUsers) { + unsigned TargetNumRegisters = TTI.getNumberOfRegisters(pair.first); + if (pair.second > TargetNumRegisters) + Selected = false; + } + if (Selected) { MaxVF = VFs[i]; break; } @@ -5115,22 +5121,12 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(unsigned VF, if (TC > 1 && TC < TinyTripCountInterleaveThreshold) return 1; - unsigned TargetNumRegisters = TTI.getNumberOfRegisters(VF > 1); - LLVM_DEBUG(dbgs() << "LV: The target has " << TargetNumRegisters - << " registers\n"); - - if (VF == 1) { - if (ForceTargetNumScalarRegs.getNumOccurrences() > 0) - TargetNumRegisters = ForceTargetNumScalarRegs; - } else { - if (ForceTargetNumVectorRegs.getNumOccurrences() > 0) - TargetNumRegisters = ForceTargetNumVectorRegs; - } - RegisterUsage R = calculateRegisterUsage({VF})[0]; // We divide by these constants so assume that we have at least one // instruction that uses at least one register. - R.MaxLocalUsers = std::max(R.MaxLocalUsers, 1U); + for (auto& pair : R.MaxLocalUsers) { + pair.second = std::max(pair.second, 1U); + } // We calculate the interleave count using the following formula. // Subtract the number of loop invariants from the number of available @@ -5143,13 +5139,35 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(unsigned VF, // We also want power of two interleave counts to ensure that the induction // variable of the vector loop wraps to zero, when tail is folded by masking; // this currently happens when OptForSize, in which case IC is set to 1 above. - unsigned IC = PowerOf2Floor((TargetNumRegisters - R.LoopInvariantRegs) / - R.MaxLocalUsers); + unsigned IC = UINT_MAX; - // Don't count the induction variable as interleaved. - if (EnableIndVarRegisterHeur) - IC = PowerOf2Floor((TargetNumRegisters - R.LoopInvariantRegs - 1) / - std::max(1U, (R.MaxLocalUsers - 1))); + for (auto& pair : R.MaxLocalUsers) { + unsigned TargetNumRegisters = TTI.getNumberOfRegisters(pair.first); + LLVM_DEBUG(dbgs() << "LV: The target has " << TargetNumRegisters + << " registers of " + << TTI.getRegisterClassName(pair.first) << " register class\n"); + if (VF == 1) { + if (ForceTargetNumScalarRegs.getNumOccurrences() > 0) + TargetNumRegisters = ForceTargetNumScalarRegs; + } else { + if (ForceTargetNumVectorRegs.getNumOccurrences() > 0) + TargetNumRegisters = ForceTargetNumVectorRegs; + } + unsigned MaxLocalUsers = pair.second; + unsigned LoopInvariantRegs = 0; + if (R.LoopInvariantRegs.find(pair.first) != R.LoopInvariantRegs.end()) + LoopInvariantRegs = R.LoopInvariantRegs[pair.first]; + + unsigned TmpIC = PowerOf2Floor((TargetNumRegisters - LoopInvariantRegs) / MaxLocalUsers); + // Don't count the induction variable as interleaved. + if (EnableIndVarRegisterHeur) { + TmpIC = + PowerOf2Floor((TargetNumRegisters - LoopInvariantRegs - 1) / + std::max(1U, (MaxLocalUsers - 1))); + } + + IC = std::min(IC, TmpIC); + } // Clamp the interleave ranges to reasonable counts. unsigned MaxInterleaveCount = TTI.getMaxInterleaveFactor(VF); @@ -5331,7 +5349,7 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef VFs) { const DataLayout &DL = TheFunction->getParent()->getDataLayout(); SmallVector RUs(VFs.size()); - SmallVector MaxUsages(VFs.size(), 0); + SmallVector, 8> MaxUsages(VFs.size()); LLVM_DEBUG(dbgs() << "LV(REG): Calculating max register usage:\n"); @@ -5361,21 +5379,45 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef VFs) { // For each VF find the maximum usage of registers. for (unsigned j = 0, e = VFs.size(); j < e; ++j) { + // Count the number of live intervals. + SmallMapVector RegUsage; + if (VFs[j] == 1) { - MaxUsages[j] = std::max(MaxUsages[j], OpenIntervals.size()); - continue; + for (auto Inst : OpenIntervals) { + unsigned ClassID = TTI.getRegisterClassForType(false, Inst->getType()); + if (RegUsage.find(ClassID) == RegUsage.end()) + RegUsage[ClassID] = 1; + else + RegUsage[ClassID] += 1; + } + } else { + collectUniformsAndScalars(VFs[j]); + for (auto Inst : OpenIntervals) { + // Skip ignored values for VF > 1. + if (VecValuesToIgnore.find(Inst) != VecValuesToIgnore.end()) + continue; + if (isScalarAfterVectorization(Inst, VFs[j])) { + unsigned ClassID = TTI.getRegisterClassForType(false, Inst->getType()); + if (RegUsage.find(ClassID) == RegUsage.end()) + RegUsage[ClassID] = 1; + else + RegUsage[ClassID] += 1; + } else { + unsigned ClassID = TTI.getRegisterClassForType(true, Inst->getType()); + if (RegUsage.find(ClassID) == RegUsage.end()) + RegUsage[ClassID] = GetRegUsage(Inst->getType(), VFs[j]); + else + RegUsage[ClassID] += GetRegUsage(Inst->getType(), VFs[j]); + } + } } - collectUniformsAndScalars(VFs[j]); - // Count the number of live intervals. - unsigned RegUsage = 0; - for (auto Inst : OpenIntervals) { - // Skip ignored values for VF > 1. - if (VecValuesToIgnore.find(Inst) != VecValuesToIgnore.end() || - isScalarAfterVectorization(Inst, VFs[j])) - continue; - RegUsage += GetRegUsage(Inst->getType(), VFs[j]); + + for (auto& pair : RegUsage) { + if (MaxUsages[j].find(pair.first) != MaxUsages[j].end()) + MaxUsages[j][pair.first] = std::max(MaxUsages[j][pair.first], pair.second); + else + MaxUsages[j][pair.first] = pair.second; } - MaxUsages[j] = std::max(MaxUsages[j], RegUsage); } LLVM_DEBUG(dbgs() << "LV(REG): At #" << i << " Interval # " @@ -5386,18 +5428,32 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef VFs) { } for (unsigned i = 0, e = VFs.size(); i < e; ++i) { - unsigned Invariant = 0; - if (VFs[i] == 1) - Invariant = LoopInvariants.size(); - else { - for (auto Inst : LoopInvariants) - Invariant += GetRegUsage(Inst->getType(), VFs[i]); + SmallMapVector Invariant; + + for (auto Inst : LoopInvariants) { + unsigned Usage = VFs[i] == 1 ? 1 : GetRegUsage(Inst->getType(), VFs[i]); + unsigned ClassID = TTI.getRegisterClassForType(VFs[i] > 1, Inst->getType()); + if (Invariant.find(ClassID) == Invariant.end()) + Invariant[ClassID] = Usage; + else + Invariant[ClassID] += Usage; } LLVM_DEBUG(dbgs() << "LV(REG): VF = " << VFs[i] << '\n'); - LLVM_DEBUG(dbgs() << "LV(REG): Found max usage: " << MaxUsages[i] << '\n'); - LLVM_DEBUG(dbgs() << "LV(REG): Found invariant usage: " << Invariant - << '\n'); + LLVM_DEBUG(dbgs() << "LV(REG): Found max usage: " + << MaxUsages[i].size() << " item\n"); + for (const auto& pair : MaxUsages[i]) { + LLVM_DEBUG(dbgs() << "LV(REG): RegisterClass: " + << TTI.getRegisterClassName(pair.first) + << ", " << pair.second << " registers \n"); + } + LLVM_DEBUG(dbgs() << "LV(REG): Found invariant usage: " + << Invariant.size() << " item\n"); + for (const auto& pair : Invariant) { + LLVM_DEBUG(dbgs() << "LV(REG): RegisterClass: " + << TTI.getRegisterClassName(pair.first) + << ", " << pair.second << " registers \n"); + } RU.LoopInvariantRegs = Invariant; RU.MaxLocalUsers = MaxUsages[i]; @@ -7762,7 +7818,8 @@ bool LoopVectorizePass::runImpl( // The second condition is necessary because, even if the target has no // vector registers, loop vectorization may still enable scalar // interleaving. - if (!TTI->getNumberOfRegisters(true) && TTI->getMaxInterleaveFactor(1) < 2) + if (!TTI->getNumberOfRegisters(TTI->getRegisterClassForType(true)) && + TTI->getMaxInterleaveFactor(1) < 2) return false; bool Changed = false; diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 99428c6c5dee3..a22153bbed170 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -5237,7 +5237,7 @@ bool SLPVectorizerPass::runImpl(Function &F, ScalarEvolution *SE_, // If the target claims to have no vector registers don't attempt // vectorization. - if (!TTI->getNumberOfRegisters(true)) + if (!TTI->getNumberOfRegisters(TTI->getRegisterClassForType(true))) return false; // Don't vectorize when the attribute NoImplicitFloat is used. diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/reg-usage.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/reg-usage.ll new file mode 100644 index 0000000000000..7c48d6400eb7b --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/PowerPC/reg-usage.ll @@ -0,0 +1,178 @@ +; RUN: opt < %s -debug-only=loop-vectorize -loop-vectorize -vectorizer-maximize-bandwidth -O2 -mtriple=powerpc64-unknown-linux -S -mcpu=pwr8 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-PWR8 +; RUN: opt < %s -debug-only=loop-vectorize -loop-vectorize -vectorizer-maximize-bandwidth -O2 -mtriple=powerpc64le-unknown-linux -S -mcpu=pwr9 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-PWR9 + +@a = global [1024 x i8] zeroinitializer, align 16 +@b = global [1024 x i8] zeroinitializer, align 16 + +define i32 @foo() { +; +; CHECK-LABEL: foo + +; CHECK: LV(REG): VF = 8 +; CHECK-NEXT: LV(REG): Found max usage: 2 item +; CHECK-NEXT: LV(REG): RegisterClass: PPC::GPRRC, 2 registers +; CHECK-NEXT: LV(REG): RegisterClass: PPC::VSXRC, 7 registers +; CHECK-NEXT: LV(REG): Found invariant usage: 0 item +; CHECK: LV(REG): VF = 16 +; CHECK-NEXT: LV(REG): Found max usage: 2 item +; CHECK-NEXT: LV(REG): RegisterClass: PPC::GPRRC, 2 registers +; CHECK-NEXT: LV(REG): RegisterClass: PPC::VSXRC, 13 registers +; CHECK-NEXT: LV(REG): Found invariant usage: 0 item + +; CHECK-PWR8: LV(REG): VF = 16 +; CHECK-PWR8-NEXT: LV(REG): Found max usage: 2 item +; CHECK-PWR8-NEXT: LV(REG): RegisterClass: PPC::GPRRC, 2 registers +; CHECK-PWR8-NEXT: LV(REG): RegisterClass: PPC::VSXRC, 13 registers +; CHECK-PWR8-NEXT: LV(REG): Found invariant usage: 0 item +; CHECK-PWR8: Setting best plan to VF=16, UF=4 + +; CHECK-PWR9: LV(REG): VF = 8 +; CHECK-PWR9-NEXT: LV(REG): Found max usage: 2 item +; CHECK-PWR9-NEXT: LV(REG): RegisterClass: PPC::GPRRC, 2 registers +; CHECK-PWR9-NEXT: LV(REG): RegisterClass: PPC::VSXRC, 7 registers +; CHECK-PWR9-NEXT: LV(REG): Found invariant usage: 0 item +; CHECK-PWR9: Setting best plan to VF=8, UF=8 + + +entry: + br label %for.body + +for.cond.cleanup: + %add.lcssa = phi i32 [ %add, %for.body ] + ret i32 %add.lcssa + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %s.015 = phi i32 [ 0, %entry ], [ %add, %for.body ] + %arrayidx = getelementptr inbounds [1024 x i8], [1024 x i8]* @a, i64 0, i64 %indvars.iv + %0 = load i8, i8* %arrayidx, align 1 + %conv = zext i8 %0 to i32 + %arrayidx2 = getelementptr inbounds [1024 x i8], [1024 x i8]* @b, i64 0, i64 %indvars.iv + %1 = load i8, i8* %arrayidx2, align 1 + %conv3 = zext i8 %1 to i32 + %sub = sub nsw i32 %conv, %conv3 + %ispos = icmp sgt i32 %sub, -1 + %neg = sub nsw i32 0, %sub + %2 = select i1 %ispos, i32 %sub, i32 %neg + %add = add nsw i32 %2, %s.015 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1024 + br i1 %exitcond, label %for.cond.cleanup, label %for.body +} + +define i32 @goo() { +; For indvars.iv used in a computating chain only feeding into getelementptr or cmp, +; it will not have vector version and the vector register usage will not exceed the +; available vector register number. +; CHECK-LABEL: goo +; CHECK: LV(REG): VF = 8 +; CHECK-NEXT: LV(REG): Found max usage: 2 item +; CHECK-NEXT: LV(REG): RegisterClass: PPC::GPRRC, 2 registers +; CHECK-NEXT: LV(REG): RegisterClass: PPC::VSXRC, 7 registers +; CHECK-NEXT: LV(REG): Found invariant usage: 0 item +; CHECK: LV(REG): VF = 16 +; CHECK-NEXT: LV(REG): Found max usage: 2 item +; CHECK-NEXT: LV(REG): RegisterClass: PPC::GPRRC, 2 registers +; CHECK-NEXT: LV(REG): RegisterClass: PPC::VSXRC, 13 registers +; CHECK-NEXT: LV(REG): Found invariant usage: 0 item +; CHECK: LV(REG): VF = 16 +; CHECK-NEXT: LV(REG): Found max usage: 2 item +; CHECK-NEXT: LV(REG): RegisterClass: PPC::GPRRC, 2 registers +; CHECK-NEXT: LV(REG): RegisterClass: PPC::VSXRC, 13 registers +; CHECK-NEXT: LV(REG): Found invariant usage: 0 item + +; CHECK: Setting best plan to VF=16, UF=4 + +entry: + br label %for.body + +for.cond.cleanup: ; preds = %for.body + %add.lcssa = phi i32 [ %add, %for.body ] + ret i32 %add.lcssa + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %s.015 = phi i32 [ 0, %entry ], [ %add, %for.body ] + %tmp1 = add nsw i64 %indvars.iv, 3 + %arrayidx = getelementptr inbounds [1024 x i8], [1024 x i8]* @a, i64 0, i64 %tmp1 + %tmp = load i8, i8* %arrayidx, align 1 + %conv = zext i8 %tmp to i32 + %tmp2 = add nsw i64 %indvars.iv, 2 + %arrayidx2 = getelementptr inbounds [1024 x i8], [1024 x i8]* @b, i64 0, i64 %tmp2 + %tmp3 = load i8, i8* %arrayidx2, align 1 + %conv3 = zext i8 %tmp3 to i32 + %sub = sub nsw i32 %conv, %conv3 + %ispos = icmp sgt i32 %sub, -1 + %neg = sub nsw i32 0, %sub + %tmp4 = select i1 %ispos, i32 %sub, i32 %neg + %add = add nsw i32 %tmp4, %s.015 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1024 + br i1 %exitcond, label %for.cond.cleanup, label %for.body +} + +define i64 @bar(i64* nocapture %a) { +; CHECK-LABEL: bar +; CHECK: LV(REG): VF = 2 +; CHECK-NEXT: LV(REG): Found max usage: 2 item +; CHECK-NEXT: LV(REG): RegisterClass: PPC::VSXRC, 3 registers +; CHECK-NEXT: LV(REG): RegisterClass: PPC::GPRRC, 1 registers +; CHECK-NEXT: LV(REG): Found invariant usage: 0 item + +; CHECK: Setting best plan to VF=2, UF=12 + +entry: + br label %for.body + +for.cond.cleanup: + %add2.lcssa = phi i64 [ %add2, %for.body ] + ret i64 %add2.lcssa + +for.body: + %i.012 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %s.011 = phi i64 [ 0, %entry ], [ %add2, %for.body ] + %arrayidx = getelementptr inbounds i64, i64* %a, i64 %i.012 + %0 = load i64, i64* %arrayidx, align 8 + %add = add nsw i64 %0, %i.012 + store i64 %add, i64* %arrayidx, align 8 + %add2 = add nsw i64 %add, %s.011 + %inc = add nuw nsw i64 %i.012, 1 + %exitcond = icmp eq i64 %inc, 1024 + br i1 %exitcond, label %for.cond.cleanup, label %for.body +} + +@d = external global [0 x i64], align 8 +@e = external global [0 x i32], align 4 +@c = external global [0 x i32], align 4 + +define void @hoo(i32 %n) { +; CHECK-LABEL: hoo +; CHECK: LV(REG): VF = 4 +; CHECK-NEXT: LV(REG): Found max usage: 2 item +; CHECK-NEXT: LV(REG): RegisterClass: PPC::GPRRC, 2 registers +; CHECK-NEXT: LV(REG): RegisterClass: PPC::VSXRC, 2 registers +; CHECK-NEXT: LV(REG): Found invariant usage: 0 item +; CHECK: LV(REG): VF = 1 +; CHECK-NEXT: LV(REG): Found max usage: 1 item +; CHECK-NEXT: LV(REG): RegisterClass: PPC::GPRRC, 2 registers +; CHECK-NEXT: LV(REG): Found invariant usage: 0 item +; CHECK: Setting best plan to VF=1, UF=12 + +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds [0 x i64], [0 x i64]* @d, i64 0, i64 %indvars.iv + %tmp = load i64, i64* %arrayidx, align 8 + %arrayidx1 = getelementptr inbounds [0 x i32], [0 x i32]* @e, i64 0, i64 %tmp + %tmp1 = load i32, i32* %arrayidx1, align 4 + %arrayidx3 = getelementptr inbounds [0 x i32], [0 x i32]* @c, i64 0, i64 %indvars.iv + store i32 %tmp1, i32* %arrayidx3, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 10000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} diff --git a/llvm/test/Transforms/LoopVectorize/X86/reg-usage-debug.ll b/llvm/test/Transforms/LoopVectorize/X86/reg-usage-debug.ll index 8205092deffa2..b6254a4f8aa04 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/reg-usage-debug.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/reg-usage-debug.ll @@ -22,7 +22,11 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" ; CHECK: LV: Checking a loop in "test_g" -; CHECK: LV(REG): Found max usage: 2 +; CHECK: LV(REG): Found max usage: 2 item +; CHECK-NEXT: LV(REG): RegisterClass: Generic::ScalarRC, 2 registers +; CHECK-NEXT: LV(REG): RegisterClass: Generic::VectorRC, 2 registers +; CHECK-NEXT: LV(REG): Found invariant usage: 1 item +; CHECK-NEXT: LV(REG): RegisterClass: Generic::VectorRC, 2 registers define i32 @test_g(i32* nocapture readonly %a, i32 %n) local_unnamed_addr !dbg !6 { entry: @@ -60,7 +64,11 @@ for.end: ; preds = %for.end.loopexit, % } ; CHECK: LV: Checking a loop in "test" -; CHECK: LV(REG): Found max usage: 2 +; CHECK: LV(REG): Found max usage: 2 item +; CHECK-NEXT: LV(REG): RegisterClass: Generic::ScalarRC, 2 registers +; CHECK-NEXT: LV(REG): RegisterClass: Generic::VectorRC, 2 registers +; CHECK-NEXT: LV(REG): Found invariant usage: 1 item +; CHECK-NEXT: LV(REG): RegisterClass: Generic::VectorRC, 2 registers define i32 @test(i32* nocapture readonly %a, i32 %n) local_unnamed_addr { entry: diff --git a/llvm/test/Transforms/LoopVectorize/X86/reg-usage.ll b/llvm/test/Transforms/LoopVectorize/X86/reg-usage.ll index 9b276aa2bd7df..cae9360e06133 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/reg-usage.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/reg-usage.ll @@ -11,9 +11,15 @@ define i32 @foo() { ; ; CHECK-LABEL: foo ; CHECK: LV(REG): VF = 8 -; CHECK-NEXT: LV(REG): Found max usage: 7 +; CHECK-NEXT: LV(REG): Found max usage: 2 item +; CHECK-NEXT: LV(REG): RegisterClass: Generic::ScalarRC, 2 registers +; CHECK-NEXT: LV(REG): RegisterClass: Generic::VectorRC, 7 registers +; CHECK-NEXT: LV(REG): Found invariant usage: 0 item ; CHECK: LV(REG): VF = 16 -; CHECK-NEXT: LV(REG): Found max usage: 13 +; CHECK-NEXT: LV(REG): Found max usage: 2 item +; CHECK-NEXT: LV(REG): RegisterClass: Generic::ScalarRC, 2 registers +; CHECK-NEXT: LV(REG): RegisterClass: Generic::VectorRC, 13 registers +; CHECK-NEXT: LV(REG): Found invariant usage: 0 item entry: br label %for.body @@ -47,9 +53,15 @@ define i32 @goo() { ; available vector register number. ; CHECK-LABEL: goo ; CHECK: LV(REG): VF = 8 -; CHECK-NEXT: LV(REG): Found max usage: 7 +; CHECK-NEXT: LV(REG): Found max usage: 2 item +; CHECK-NEXT: LV(REG): RegisterClass: Generic::ScalarRC, 2 registers +; CHECK-NEXT: LV(REG): RegisterClass: Generic::VectorRC, 7 registers +; CHECK-NEXT: LV(REG): Found invariant usage: 0 item ; CHECK: LV(REG): VF = 16 -; CHECK-NEXT: LV(REG): Found max usage: 13 +; CHECK-NEXT: LV(REG): Found max usage: 2 item +; CHECK-NEXT: LV(REG): RegisterClass: Generic::ScalarRC, 2 registers +; CHECK-NEXT: LV(REG): RegisterClass: Generic::VectorRC, 13 registers +; CHECK-NEXT: LV(REG): Found invariant usage: 0 item entry: br label %for.body @@ -81,8 +93,11 @@ for.body: ; preds = %for.body, %entry define i64 @bar(i64* nocapture %a) { ; CHECK-LABEL: bar ; CHECK: LV(REG): VF = 2 -; CHECK: LV(REG): Found max usage: 3 -; +; CHECK-NEXT: LV(REG): Found max usage: 2 item +; CHECK-NEXT: LV(REG): RegisterClass: Generic::VectorRC, 3 registers +; CHECK-NEXT: LV(REG): RegisterClass: Generic::ScalarRC, 1 registers +; CHECK-NEXT: LV(REG): Found invariant usage: 0 item + entry: br label %for.body @@ -113,8 +128,11 @@ define void @hoo(i32 %n) { ; so the max usage of AVX512 vector register will be 2. ; AVX512F-LABEL: bar ; AVX512F: LV(REG): VF = 16 -; AVX512F: LV(REG): Found max usage: 2 -; +; AVX512F-CHECK: LV(REG): Found max usage: 2 item +; AVX512F-CHECK: LV(REG): RegisterClass: Generic::ScalarRC, 2 registers +; AVX512F-CHECK: LV(REG): RegisterClass: Generic::VectorRC, 2 registers +; AVX512F-CHECK: LV(REG): Found invariant usage: 0 item + entry: br label %for.body From 411f1885b655ea622fe124a87a6eadfd988d7a5e Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Tue, 8 Oct 2019 04:39:52 +0000 Subject: [PATCH 212/254] [IA] Recognize hexadecimal escape sequences Summary: Implement support for hexadecimal escape sequences to match how GNU 'as' handles them. I.e., read all hexadecimal characters and truncate to the lower 16 bits. Reviewers: nickdesaulniers, jcai19 Subscribers: llvm-commits, hiraditya Tags: #llvm Differential Revision: https://reviews.llvm.org/D68598 llvm-svn: 374018 --- llvm/lib/MC/MCParser/AsmParser.cpp | 18 +++++++++++++++++- llvm/test/MC/AsmParser/directive_ascii.s | 5 +++++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/llvm/lib/MC/MCParser/AsmParser.cpp b/llvm/lib/MC/MCParser/AsmParser.cpp index 381bf96416166..ca6bc252a0df7 100644 --- a/llvm/lib/MC/MCParser/AsmParser.cpp +++ b/llvm/lib/MC/MCParser/AsmParser.cpp @@ -2914,11 +2914,27 @@ bool AsmParser::parseEscapedString(std::string &Data) { } // Recognize escaped characters. Note that this escape semantics currently - // loosely follows Darwin 'as'. Notably, it doesn't support hex escapes. + // loosely follows Darwin 'as'. ++i; if (i == e) return TokError("unexpected backslash at end of string"); + // Recognize hex sequences similarly to GNU 'as'. + if (Str[i] == 'x' || Str[i] == 'X') { + size_t length = Str.size(); + if (i + 1 >= length || !isHexDigit(Str[i + 1])) + return TokError("invalid hexadecimal escape sequence"); + + // Consume hex characters. GNU 'as' reads all hexadecimal characters and + // then truncates to the lower 16 bits. Seems reasonable. + unsigned Value = 0; + while (i + 1 < length && isHexDigit(Str[i + 1])) + Value = Value * 16 + hexDigitValue(Str[++i]); + + Data += (unsigned char)(Value & 0xFF); + continue; + } + // Recognize octal sequences. if ((unsigned)(Str[i] - '0') <= 7) { // Consume up to three octal characters. diff --git a/llvm/test/MC/AsmParser/directive_ascii.s b/llvm/test/MC/AsmParser/directive_ascii.s index a7ba7bbd5da13..604f9721bcca9 100644 --- a/llvm/test/MC/AsmParser/directive_ascii.s +++ b/llvm/test/MC/AsmParser/directive_ascii.s @@ -39,3 +39,8 @@ TEST5: # CHECK: .byte 0 TEST6: .string "B", "C" + +# CHECK: TEST7: +# CHECK: .ascii "dk" +TEST7: + .ascii "\x64\Xa6B" From f1ac8151f9cd9a7380b877e4e57213aa9b995e05 Mon Sep 17 00:00:00 2001 From: Clement Courbet Date: Tue, 8 Oct 2019 07:08:48 +0000 Subject: [PATCH 213/254] [llvm-exegesis] Add stabilization test with config In preparation for D68629. llvm-svn: 374020 --- ...analysis-cluster-stabilization-config.test | 43 +++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 llvm/test/tools/llvm-exegesis/X86/analysis-cluster-stabilization-config.test diff --git a/llvm/test/tools/llvm-exegesis/X86/analysis-cluster-stabilization-config.test b/llvm/test/tools/llvm-exegesis/X86/analysis-cluster-stabilization-config.test new file mode 100644 index 0000000000000..0403af4a229a5 --- /dev/null +++ b/llvm/test/tools/llvm-exegesis/X86/analysis-cluster-stabilization-config.test @@ -0,0 +1,43 @@ +# RUN: llvm-exegesis -mode=analysis -benchmarks-file=%s -analysis-inconsistencies-output-file=- -analysis-clustering-epsilon=0.5 -analysis-inconsistency-epsilon=0.5 -analysis-display-unstable-clusters -analysis-numpoints=1 | FileCheck -check-prefixes=CHECK-UNSTABLE %s + +# We have two measurements with different measurements for SQRTSSr, but they +# have different configs, so they should not be placed in the same cluster by +# stabilization. + +# CHECK-UNSTABLE: SQRTSSr +# CHECK-UNSTABLE: SQRTSSr + +--- +mode: latency +key: + instructions: + - 'SQRTSSr XMM11 XMM11' + config: 'config1' + register_initial_values: + - 'XMM11=0x0' +cpu_name: bdver2 +llvm_triple: x86_64-unknown-linux-gnu +num_repetitions: 10000 +measurements: + - { key: latency, value: 90.1111, per_snippet_value: 90.1111 } +error: '' +info: Repeating a single explicitly serial instruction +assembled_snippet: 4883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C57A6F1C244883C410F3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBC3 +... +--- +mode: latency +key: + instructions: + - 'SQRTSSr XMM11 XMM11' + config: 'config2' + register_initial_values: + - 'XMM11=0x0' +cpu_name: bdver2 +llvm_triple: x86_64-unknown-linux-gnu +num_repetitions: 10000 +measurements: + - { key: latency, value: 100, per_snippet_value: 100 } +error: '' +info: Repeating a single explicitly serial instruction +assembled_snippet: 4883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C57A6F1C244883C410F3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBF3450F51DBC3 +... From 18b6fe07bcf44294f200bd2b526cb737ed275c04 Mon Sep 17 00:00:00 2001 From: Kadir Cetinkaya Date: Tue, 8 Oct 2019 07:39:50 +0000 Subject: [PATCH 214/254] [LoopVectorize] Fix non-debug builds after rL374017 llvm-svn: 374021 --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 11e1cd003b426..18cc61f7a2b79 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -5442,17 +5442,19 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef VFs) { LLVM_DEBUG(dbgs() << "LV(REG): VF = " << VFs[i] << '\n'); LLVM_DEBUG(dbgs() << "LV(REG): Found max usage: " << MaxUsages[i].size() << " item\n"); - for (const auto& pair : MaxUsages[i]) { + for (const auto& Pair : MaxUsages[i]) { + (void)Pair; LLVM_DEBUG(dbgs() << "LV(REG): RegisterClass: " - << TTI.getRegisterClassName(pair.first) - << ", " << pair.second << " registers \n"); + << TTI.getRegisterClassName(Pair.first) + << ", " << Pair.second << " registers \n"); } LLVM_DEBUG(dbgs() << "LV(REG): Found invariant usage: " << Invariant.size() << " item\n"); - for (const auto& pair : Invariant) { + for (const auto& Pair : Invariant) { + (void)Pair; LLVM_DEBUG(dbgs() << "LV(REG): RegisterClass: " - << TTI.getRegisterClassName(pair.first) - << ", " << pair.second << " registers \n"); + << TTI.getRegisterClassName(Pair.first) + << ", " << Pair.second << " registers \n"); } RU.LoopInvariantRegs = Invariant; From 54933667296d687743e8bd44048389b01f2cb94b Mon Sep 17 00:00:00 2001 From: Rui Ueyama Date: Tue, 8 Oct 2019 08:03:40 +0000 Subject: [PATCH 215/254] Report error if -export-dynamic is used with -r The combination of the two flags doesn't make sense. And other linkers seem to just ignore --export-dynamic if --relocatable is given, but we probably should report it as an error to let users know that is an invalid combination. Fixes https://bugs.llvm.org/show_bug.cgi?id=43552 Differential Revision: https://reviews.llvm.org/D68441 llvm-svn: 374022 --- lld/ELF/Driver.cpp | 2 ++ lld/test/ELF/driver.test | 4 ++++ lld/test/ELF/lto/relocation-model.ll | 6 ------ 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index fbdf28b1a2029..616718b69d813 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -334,6 +334,8 @@ static void checkOptions() { error("-r and --icf may not be used together"); if (config->pie) error("-r and -pie may not be used together"); + if (config->exportDynamic) + error("-r and --export-dynamic may not be used together"); } if (config->executeOnly) { diff --git a/lld/test/ELF/driver.test b/lld/test/ELF/driver.test index 77f9828a0357e..c6fbacf0bea2f 100644 --- a/lld/test/ELF/driver.test +++ b/lld/test/ELF/driver.test @@ -72,6 +72,10 @@ # RUN: not ld.lld %t -z max-page-size 2>&1 | FileCheck -check-prefix=ERR11 %s # ERR11: unknown -z value: max-page-size +## Attempt to use -r and --export-dynamic together +# RUN: not ld.lld -r -export-dynamic %t -o %tfail 2>&1 | FileCheck -check-prefix=ERR12 %s +# ERR12: -r and --export-dynamic may not be used together + .globl _start _start: nop diff --git a/lld/test/ELF/lto/relocation-model.ll b/lld/test/ELF/lto/relocation-model.ll index beb693ebeab50..45e21db64187f 100644 --- a/lld/test/ELF/lto/relocation-model.ll +++ b/lld/test/ELF/lto/relocation-model.ll @@ -14,9 +14,6 @@ ; RUN: ld.lld %t.o -o %t-out -save-temps --export-dynamic --noinhibit-exec ; RUN: llvm-readobj -r %t-out.lto.o | FileCheck %s --check-prefix=STATIC -; RUN: ld.lld %t.o -o %t-out -save-temps -r --export-dynamic -; RUN: llvm-readobj -r %t-out.lto.o | FileCheck %s --check-prefix=STATIC - ;; PIC source. @@ -29,9 +26,6 @@ ; RUN: ld.lld %t.pic.o -o %t-out -save-temps --export-dynamic --noinhibit-exec ; RUN: llvm-readobj -r %t-out.lto.o | FileCheck %s --check-prefix=STATIC -; RUN: ld.lld %t.pic.o -o %t-out -save-temps -r --export-dynamic -; RUN: llvm-readobj -r %t-out.lto.o | FileCheck %s --check-prefix=PIC - ;; Explicit flag. From d2e9dd3877e903812ed5568e3a59e9e124ca4f85 Mon Sep 17 00:00:00 2001 From: Rui Ueyama Date: Tue, 8 Oct 2019 08:03:44 +0000 Subject: [PATCH 216/254] Use /dev/null for tests that we do not need outputs llvm-svn: 374023 --- lld/test/ELF/driver.test | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/lld/test/ELF/driver.test b/lld/test/ELF/driver.test index c6fbacf0bea2f..bec8301bb9a04 100644 --- a/lld/test/ELF/driver.test +++ b/lld/test/ELF/driver.test @@ -27,31 +27,31 @@ ## Attempt to link DSO with -r # RUN: ld.lld -shared %t -o %t.so -# RUN: not ld.lld -r %t.so %t -o %tfail 2>&1 | FileCheck -check-prefix=ERR %s +# RUN: not ld.lld -r %t.so %t -o /dev/null 2>&1 | FileCheck -check-prefix=ERR %s # ERR: attempted static link of dynamic object ## Attempt to use -r and -shared together -# RUN: not ld.lld -r -shared %t -o %tfail 2>&1 | FileCheck -check-prefix=ERR2 %s +# RUN: not ld.lld -r -shared %t -o /dev/null 2>&1 | FileCheck -check-prefix=ERR2 %s # ERR2: -r and -shared may not be used together ## Attempt to use -r and --gc-sections together -# RUN: not ld.lld -r --gc-sections %t -o %tfail 2>&1 | FileCheck -check-prefix=ERR3 %s +# RUN: not ld.lld -r --gc-sections %t -o /dev/null 2>&1 | FileCheck -check-prefix=ERR3 %s # ERR3: -r and --gc-sections may not be used together ## Attempt to use -r and --gdb-index together -# RUN: not ld.lld -r --gdb-index %t -o %tfail 2>&1 | FileCheck -check-prefix=ERR4 %s +# RUN: not ld.lld -r --gdb-index %t -o /dev/null 2>&1 | FileCheck -check-prefix=ERR4 %s # ERR4: -r and --gdb-index may not be used together ## Attempt to use -r and --icf together -# RUN: not ld.lld -r --icf=all %t -o %tfail 2>&1 | FileCheck -check-prefix=ERR5 %s +# RUN: not ld.lld -r --icf=all %t -o /dev/null 2>&1 | FileCheck -check-prefix=ERR5 %s # ERR5: -r and --icf may not be used together ## Attempt to use -r and -pie together -# RUN: not ld.lld -r -pie %t -o %tfail 2>&1 | FileCheck -check-prefix=ERR6 %s +# RUN: not ld.lld -r -pie %t -o /dev/null 2>&1 | FileCheck -check-prefix=ERR6 %s # ERR6: -r and -pie may not be used together ## Attempt to use -shared and -pie together -# RUN: not ld.lld -shared -pie %t -o %tfail 2>&1 | FileCheck -check-prefix=ERR7 %s +# RUN: not ld.lld -shared -pie %t -o /dev/null 2>&1 | FileCheck -check-prefix=ERR7 %s # ERR7: -shared and -pie may not be used together ## "--output=foo" is equivalent to "-o foo". @@ -73,7 +73,7 @@ # ERR11: unknown -z value: max-page-size ## Attempt to use -r and --export-dynamic together -# RUN: not ld.lld -r -export-dynamic %t -o %tfail 2>&1 | FileCheck -check-prefix=ERR12 %s +# RUN: not ld.lld -r -export-dynamic %t -o /dev/null 2>&1 | FileCheck -check-prefix=ERR12 %s # ERR12: -r and --export-dynamic may not be used together .globl _start From c9ddda84052659698b921e6c3a5bf7df9df599ce Mon Sep 17 00:00:00 2001 From: Kai Nacke Date: Tue, 8 Oct 2019 08:21:20 +0000 Subject: [PATCH 217/254] [Tools] Mark output of tools as text if it is text Several LLVM tools write text files/streams without using OF_Text. This can cause problems on platforms which distinguish between text and binary output. This PR adds the OF_Text flag for the following tools: - llvm-dis - llvm-dwarfdump - llvm-mca - llvm-mc (assembler files only) - opt (assembler files only) - RemarkStreamer (used e.g. by opt) Reviewers: rnk, vivekvpandya, Bigcheese, andreadb Differential Revision: https://reviews.llvm.org/D67696 llvm-svn: 374024 --- llvm/lib/IR/RemarkStreamer.cpp | 12 +++++++----- llvm/tools/llvm-dis/llvm-dis.cpp | 2 +- llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp | 2 +- llvm/tools/llvm-mc/llvm-mc.cpp | 11 +++++++---- llvm/tools/llvm-mca/llvm-mca.cpp | 2 +- llvm/tools/opt/opt.cpp | 4 +++- 6 files changed, 20 insertions(+), 13 deletions(-) diff --git a/llvm/lib/IR/RemarkStreamer.cpp b/llvm/lib/IR/RemarkStreamer.cpp index 8a70862de726f..0fcc06b961f30 100644 --- a/llvm/lib/IR/RemarkStreamer.cpp +++ b/llvm/lib/IR/RemarkStreamer.cpp @@ -122,18 +122,20 @@ llvm::setupOptimizationRemarks(LLVMContext &Context, StringRef RemarksFilename, if (RemarksFilename.empty()) return nullptr; + Expected Format = remarks::parseFormat(RemarksFormat); + if (Error E = Format.takeError()) + return make_error(std::move(E)); + std::error_code EC; + auto Flags = *Format == remarks::Format::YAML ? sys::fs::OF_Text + : sys::fs::OF_None; auto RemarksFile = - std::make_unique(RemarksFilename, EC, sys::fs::OF_None); + std::make_unique(RemarksFilename, EC, Flags); // We don't use llvm::FileError here because some diagnostics want the file // name separately. if (EC) return make_error(errorCodeToError(EC)); - Expected Format = remarks::parseFormat(RemarksFormat); - if (Error E = Format.takeError()) - return make_error(std::move(E)); - Expected> RemarkSerializer = remarks::createRemarkSerializer( *Format, remarks::SerializerMode::Separate, RemarksFile->os()); diff --git a/llvm/tools/llvm-dis/llvm-dis.cpp b/llvm/tools/llvm-dis/llvm-dis.cpp index ae3295171d1c3..d66299cbf767c 100644 --- a/llvm/tools/llvm-dis/llvm-dis.cpp +++ b/llvm/tools/llvm-dis/llvm-dis.cpp @@ -186,7 +186,7 @@ int main(int argc, char **argv) { std::error_code EC; std::unique_ptr Out( - new ToolOutputFile(OutputFilename, EC, sys::fs::OF_None)); + new ToolOutputFile(OutputFilename, EC, sys::fs::OF_Text)); if (EC) { errs() << EC.message() << '\n'; return 1; diff --git a/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp b/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp index 05a7aef67eced..e20f6041f98d8 100644 --- a/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp +++ b/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp @@ -584,7 +584,7 @@ int main(int argc, char **argv) { } std::error_code EC; - ToolOutputFile OutputFile(OutputFilename, EC, sys::fs::OF_None); + ToolOutputFile OutputFile(OutputFilename, EC, sys::fs::OF_Text); error("Unable to open output file" + OutputFilename, EC); // Don't remove output file if we exit with an error. OutputFile.keep(); diff --git a/llvm/tools/llvm-mc/llvm-mc.cpp b/llvm/tools/llvm-mc/llvm-mc.cpp index 832c46683b8cc..97d507028c1ba 100644 --- a/llvm/tools/llvm-mc/llvm-mc.cpp +++ b/llvm/tools/llvm-mc/llvm-mc.cpp @@ -209,9 +209,10 @@ static const Target *GetTarget(const char *ProgName) { return TheTarget; } -static std::unique_ptr GetOutputStream(StringRef Path) { +static std::unique_ptr GetOutputStream(StringRef Path, + sys::fs::OpenFlags Flags) { std::error_code EC; - auto Out = std::make_unique(Path, EC, sys::fs::OF_None); + auto Out = std::make_unique(Path, EC, Flags); if (EC) { WithColor::error() << EC.message() << '\n'; return nullptr; @@ -413,7 +414,9 @@ int main(int argc, char **argv) { FeaturesStr = Features.getString(); } - std::unique_ptr Out = GetOutputStream(OutputFilename); + sys::fs::OpenFlags Flags = (FileType == OFT_AssemblyFile) ? sys::fs::OF_Text + : sys::fs::OF_None; + std::unique_ptr Out = GetOutputStream(OutputFilename, Flags); if (!Out) return 1; @@ -423,7 +426,7 @@ int main(int argc, char **argv) { WithColor::error() << "dwo output only supported with object files\n"; return 1; } - DwoOut = GetOutputStream(SplitDwarfFile); + DwoOut = GetOutputStream(SplitDwarfFile, sys::fs::OF_None); if (!DwoOut) return 1; } diff --git a/llvm/tools/llvm-mca/llvm-mca.cpp b/llvm/tools/llvm-mca/llvm-mca.cpp index 291ed540a8de8..99c45eebdd88e 100644 --- a/llvm/tools/llvm-mca/llvm-mca.cpp +++ b/llvm/tools/llvm-mca/llvm-mca.cpp @@ -238,7 +238,7 @@ ErrorOr> getOutputStream() { OutputFilename = "-"; std::error_code EC; auto Out = - std::make_unique(OutputFilename, EC, sys::fs::OF_None); + std::make_unique(OutputFilename, EC, sys::fs::OF_Text); if (!EC) return std::move(Out); return EC; diff --git a/llvm/tools/opt/opt.cpp b/llvm/tools/opt/opt.cpp index 638b29eca2a2a..15495a511d063 100644 --- a/llvm/tools/opt/opt.cpp +++ b/llvm/tools/opt/opt.cpp @@ -611,7 +611,9 @@ int main(int argc, char **argv) { OutputFilename = "-"; std::error_code EC; - Out.reset(new ToolOutputFile(OutputFilename, EC, sys::fs::OF_None)); + sys::fs::OpenFlags Flags = OutputAssembly ? sys::fs::OF_Text + : sys::fs::OF_None; + Out.reset(new ToolOutputFile(OutputFilename, EC, Flags)); if (EC) { errs() << EC.message() << '\n'; return 1; From 78bfe3ab9475776ae72ca7c9446066f6eb816cc0 Mon Sep 17 00:00:00 2001 From: Kristof Beyls Date: Tue, 8 Oct 2019 08:25:42 +0000 Subject: [PATCH 218/254] [ARM] Generate vcmp instead of vcmpe Based on the discussion in http://lists.llvm.org/pipermail/llvm-dev/2019-October/135574.html, the conclusion was reached that the ARM backend should produce vcmp instead of vcmpe instructions by default, i.e. not be producing an Invalid Operation exception when either arguments in a floating point compare are quiet NaNs. In the future, after constrained floating point intrinsics for floating point compare have been introduced, vcmpe instructions probably should be produced for those intrinsics - depending on the exact semantics they'll be defined to have. This patch logically consists of the following parts: - Revert http://llvm.org/viewvc/llvm-project?rev=294945&view=rev and http://llvm.org/viewvc/llvm-project?rev=294968&view=rev, which implemented fine-tuning for when to produce vcmpe (i.e. not do it for equality comparisons). The complexity introduced by those patches isn't needed anymore if we just always produce vcmp instead. Maybe these patches need to be reintroduced again once support is needed to map potential LLVM-IR constrained floating point compare intrinsics to the ARM instruction set. - Simply select vcmp, instead of vcmpe, see simple changes in lib/Target/ARM/ARMInstrVFP.td - Adapt lots of tests that tested for vcmpe (instead of vcmp). For all of these test, the intent of what is tested for isn't related to whether the vcmp should produce an Invalid Operation exception or not. Fixes PR43374. Differential Revision: https://reviews.llvm.org/D68463 llvm-svn: 374025 --- llvm/lib/Target/ARM/ARMFastISel.cpp | 22 +- llvm/lib/Target/ARM/ARMISelLowering.cpp | 51 ++-- llvm/lib/Target/ARM/ARMISelLowering.h | 2 +- llvm/lib/Target/ARM/ARMInstrInfo.td | 2 - llvm/lib/Target/ARM/ARMInstrVFP.td | 28 +- .../CodeGen/ARM/2009-07-18-RewriterBug.ll | 26 +- llvm/test/CodeGen/ARM/arm-shrink-wrapping.ll | 4 +- llvm/test/CodeGen/ARM/compare-call.ll | 2 +- llvm/test/CodeGen/ARM/fcmp-xo.ll | 12 +- llvm/test/CodeGen/ARM/float-helpers.s | 40 +-- llvm/test/CodeGen/ARM/fp16-instructions.ll | 64 ++--- llvm/test/CodeGen/ARM/fp16-promote.ll | 2 +- llvm/test/CodeGen/ARM/fpcmp.ll | 10 +- llvm/test/CodeGen/ARM/ifcvt11.ll | 6 +- llvm/test/CodeGen/ARM/swifterror.ll | 2 +- llvm/test/CodeGen/ARM/vcmp-crash.ll | 11 - llvm/test/CodeGen/ARM/vfp.ll | 2 +- llvm/test/CodeGen/ARM/vsel-fp16.ll | 40 +-- llvm/test/CodeGen/ARM/vsel.ll | 80 +++--- llvm/test/CodeGen/Thumb2/float-cmp.ll | 40 +-- llvm/test/CodeGen/Thumb2/mve-vcmpf.ll | 240 +++++++++--------- llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll | 240 +++++++++--------- llvm/test/CodeGen/Thumb2/mve-vcmpfz.ll | 240 +++++++++--------- 23 files changed, 561 insertions(+), 605 deletions(-) delete mode 100644 llvm/test/CodeGen/ARM/vcmp-crash.ll diff --git a/llvm/lib/Target/ARM/ARMFastISel.cpp b/llvm/lib/Target/ARM/ARMFastISel.cpp index 2fd11426c5a62..3e3745f129c32 100644 --- a/llvm/lib/Target/ARM/ARMFastISel.cpp +++ b/llvm/lib/Target/ARM/ARMFastISel.cpp @@ -191,7 +191,7 @@ class ARMFastISel final : public FastISel { bool isTypeLegal(Type *Ty, MVT &VT); bool isLoadTypeLegal(Type *Ty, MVT &VT); bool ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, - bool isZExt, bool isEquality); + bool isZExt); bool ARMEmitLoad(MVT VT, Register &ResultReg, Address &Addr, unsigned Alignment = 0, bool isZExt = true, bool allocReg = true); @@ -1259,8 +1259,7 @@ bool ARMFastISel::SelectBranch(const Instruction *I) { if (ARMPred == ARMCC::AL) return false; // Emit the compare. - if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned(), - CI->isEquality())) + if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned())) return false; unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc; @@ -1349,7 +1348,7 @@ bool ARMFastISel::SelectIndirectBr(const Instruction *I) { } bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, - bool isZExt, bool isEquality) { + bool isZExt) { Type *Ty = Src1Value->getType(); EVT SrcEVT = TLI.getValueType(DL, Ty, true); if (!SrcEVT.isSimple()) return false; @@ -1397,19 +1396,11 @@ bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, // TODO: Verify compares. case MVT::f32: isICmp = false; - // Equality comparisons shouldn't raise Invalid on uordered inputs. - if (isEquality) - CmpOpc = UseImm ? ARM::VCMPZS : ARM::VCMPS; - else - CmpOpc = UseImm ? ARM::VCMPEZS : ARM::VCMPES; + CmpOpc = UseImm ? ARM::VCMPZS : ARM::VCMPS; break; case MVT::f64: isICmp = false; - // Equality comparisons shouldn't raise Invalid on uordered inputs. - if (isEquality) - CmpOpc = UseImm ? ARM::VCMPZD : ARM::VCMPD; - else - CmpOpc = UseImm ? ARM::VCMPEZD : ARM::VCMPED; + CmpOpc = UseImm ? ARM::VCMPZD : ARM::VCMPD; break; case MVT::i1: case MVT::i8: @@ -1485,8 +1476,7 @@ bool ARMFastISel::SelectCmp(const Instruction *I) { if (ARMPred == ARMCC::AL) return false; // Emit the compare. - if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned(), - CI->isEquality())) + if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned())) return false; // Now set a register based on the comparison. Explicitly set the predicates diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 45bf676338228..ec5537087985d 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -1793,34 +1793,22 @@ static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) { /// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC. static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode, - ARMCC::CondCodes &CondCode2, bool &InvalidOnQNaN) { + ARMCC::CondCodes &CondCode2) { CondCode2 = ARMCC::AL; - InvalidOnQNaN = true; switch (CC) { default: llvm_unreachable("Unknown FP condition!"); case ISD::SETEQ: - case ISD::SETOEQ: - CondCode = ARMCC::EQ; - InvalidOnQNaN = false; - break; + case ISD::SETOEQ: CondCode = ARMCC::EQ; break; case ISD::SETGT: case ISD::SETOGT: CondCode = ARMCC::GT; break; case ISD::SETGE: case ISD::SETOGE: CondCode = ARMCC::GE; break; case ISD::SETOLT: CondCode = ARMCC::MI; break; case ISD::SETOLE: CondCode = ARMCC::LS; break; - case ISD::SETONE: - CondCode = ARMCC::MI; - CondCode2 = ARMCC::GT; - InvalidOnQNaN = false; - break; + case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break; case ISD::SETO: CondCode = ARMCC::VC; break; case ISD::SETUO: CondCode = ARMCC::VS; break; - case ISD::SETUEQ: - CondCode = ARMCC::EQ; - CondCode2 = ARMCC::VS; - InvalidOnQNaN = false; - break; + case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break; case ISD::SETUGT: CondCode = ARMCC::HI; break; case ISD::SETUGE: CondCode = ARMCC::PL; break; case ISD::SETLT: @@ -1828,10 +1816,7 @@ static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode, case ISD::SETLE: case ISD::SETULE: CondCode = ARMCC::LE; break; case ISD::SETNE: - case ISD::SETUNE: - CondCode = ARMCC::NE; - InvalidOnQNaN = false; - break; + case ISD::SETUNE: CondCode = ARMCC::NE; break; } } @@ -4259,15 +4244,13 @@ SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, /// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands. SDValue ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, - SelectionDAG &DAG, const SDLoc &dl, - bool InvalidOnQNaN) const { + SelectionDAG &DAG, const SDLoc &dl) const { assert(Subtarget->hasFP64() || RHS.getValueType() != MVT::f64); SDValue Cmp; - SDValue C = DAG.getConstant(InvalidOnQNaN, dl, MVT::i32); if (!isFloatingPointZero(RHS)) - Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS, C); + Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS); else - Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Glue, LHS, C); + Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Glue, LHS); return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp); } @@ -4284,12 +4267,10 @@ ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const { Cmp = Cmp.getOperand(0); Opc = Cmp.getOpcode(); if (Opc == ARMISD::CMPFP) - Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0), - Cmp.getOperand(1), Cmp.getOperand(2)); + Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1)); else { assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT"); - Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0), - Cmp.getOperand(1)); + Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0)); } return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp); } @@ -4929,8 +4910,7 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { } ARMCC::CondCodes CondCode, CondCode2; - bool InvalidOnQNaN; - FPCCToARMCC(CC, CondCode, CondCode2, InvalidOnQNaN); + FPCCToARMCC(CC, CondCode, CondCode2); // Normalize the fp compare. If RHS is zero we prefer to keep it there so we // match CMPFPw0 instead of CMPFP, though we don't do this for f16 because we @@ -4955,13 +4935,13 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { } SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32); - SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl, InvalidOnQNaN); + SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); SDValue Result = getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG); if (CondCode2 != ARMCC::AL) { SDValue ARMcc2 = DAG.getConstant(CondCode2, dl, MVT::i32); // FIXME: Needs another CMP because flag can have but one use. - SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl, InvalidOnQNaN); + SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl); Result = getCMOV(dl, VT, Result, TrueVal, ARMcc2, CCR, Cmp2, DAG); } return Result; @@ -5188,11 +5168,10 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { } ARMCC::CondCodes CondCode, CondCode2; - bool InvalidOnQNaN; - FPCCToARMCC(CC, CondCode, CondCode2, InvalidOnQNaN); + FPCCToARMCC(CC, CondCode, CondCode2); SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32); - SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl, InvalidOnQNaN); + SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue); SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp }; diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h index b8ce4d65f757e..a89ef250c0e82 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/llvm/lib/Target/ARM/ARMISelLowering.h @@ -818,7 +818,7 @@ class VectorType; SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue &ARMcc, SelectionDAG &DAG, const SDLoc &dl) const; SDValue getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG, - const SDLoc &dl, bool InvalidOnQNaN) const; + const SDLoc &dl) const; SDValue duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const; SDValue OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td index e25260d8b4750..f75343675dad0 100644 --- a/llvm/lib/Target/ARM/ARMInstrInfo.td +++ b/llvm/lib/Target/ARM/ARMInstrInfo.td @@ -51,8 +51,6 @@ def SDT_ARMAnd : SDTypeProfile<1, 2, SDTCisVT<2, i32>]>; def SDT_ARMCmp : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>; -def SDT_ARMFCmp : SDTypeProfile<0, 3, [SDTCisSameAs<0, 1>, - SDTCisVT<2, i32>]>; def SDT_ARMPICAdd : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, SDTCisPtrTy<1>, SDTCisVT<2, i32>]>; diff --git a/llvm/lib/Target/ARM/ARMInstrVFP.td b/llvm/lib/Target/ARM/ARMInstrVFP.td index d3380ab7cef77..fdd961bfbb2f7 100644 --- a/llvm/lib/Target/ARM/ARMInstrVFP.td +++ b/llvm/lib/Target/ARM/ARMInstrVFP.td @@ -10,7 +10,7 @@ // //===----------------------------------------------------------------------===// -def SDT_CMPFP0 : SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisVT<1, i32>]>; +def SDT_CMPFP0 : SDTypeProfile<0, 1, [SDTCisFP<0>]>; def SDT_VMOVDRR : SDTypeProfile<1, 2, [SDTCisVT<0, f64>, SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>; def SDT_VMOVRRD : SDTypeProfile<2, 1, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>, @@ -19,7 +19,7 @@ def SDT_VMOVRRD : SDTypeProfile<2, 1, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>, def SDT_VMOVSR : SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisVT<1, i32>]>; def arm_fmstat : SDNode<"ARMISD::FMSTAT", SDTNone, [SDNPInGlue, SDNPOutGlue]>; -def arm_cmpfp : SDNode<"ARMISD::CMPFP", SDT_ARMFCmp, [SDNPOutGlue]>; +def arm_cmpfp : SDNode<"ARMISD::CMPFP", SDT_ARMCmp, [SDNPOutGlue]>; def arm_cmpfp0 : SDNode<"ARMISD::CMPFPw0", SDT_CMPFP0, [SDNPOutGlue]>; def arm_fmdrr : SDNode<"ARMISD::VMOVDRR", SDT_VMOVDRR>; def arm_fmrrd : SDNode<"ARMISD::VMOVRRD", SDT_VMOVRRD>; @@ -548,12 +548,12 @@ let Defs = [FPSCR_NZCV] in { def VCMPED : ADuI<0b11101, 0b11, 0b0100, 0b11, 0, (outs), (ins DPR:$Dd, DPR:$Dm), IIC_fpCMP64, "vcmpe", ".f64\t$Dd, $Dm", - [(arm_cmpfp DPR:$Dd, (f64 DPR:$Dm), (i32 1))]>; + [/* For disassembly only; pattern left blank */]>; def VCMPES : ASuI<0b11101, 0b11, 0b0100, 0b11, 0, (outs), (ins SPR:$Sd, SPR:$Sm), IIC_fpCMP32, "vcmpe", ".f32\t$Sd, $Sm", - [(arm_cmpfp SPR:$Sd, SPR:$Sm, (i32 1))]> { + [/* For disassembly only; pattern left blank */]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; @@ -562,17 +562,17 @@ def VCMPES : ASuI<0b11101, 0b11, 0b0100, 0b11, 0, def VCMPEH : AHuI<0b11101, 0b11, 0b0100, 0b11, 0, (outs), (ins HPR:$Sd, HPR:$Sm), IIC_fpCMP16, "vcmpe", ".f16\t$Sd, $Sm", - [(arm_cmpfp HPR:$Sd, HPR:$Sm, (i32 1))]>; + [/* For disassembly only; pattern left blank */]>; def VCMPD : ADuI<0b11101, 0b11, 0b0100, 0b01, 0, (outs), (ins DPR:$Dd, DPR:$Dm), IIC_fpCMP64, "vcmp", ".f64\t$Dd, $Dm", - [(arm_cmpfp DPR:$Dd, (f64 DPR:$Dm), (i32 0))]>; + [(arm_cmpfp DPR:$Dd, (f64 DPR:$Dm))]>; def VCMPS : ASuI<0b11101, 0b11, 0b0100, 0b01, 0, (outs), (ins SPR:$Sd, SPR:$Sm), IIC_fpCMP32, "vcmp", ".f32\t$Sd, $Sm", - [(arm_cmpfp SPR:$Sd, SPR:$Sm, (i32 0))]> { + [(arm_cmpfp SPR:$Sd, SPR:$Sm)]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; @@ -581,7 +581,7 @@ def VCMPS : ASuI<0b11101, 0b11, 0b0100, 0b01, 0, def VCMPH : AHuI<0b11101, 0b11, 0b0100, 0b01, 0, (outs), (ins HPR:$Sd, HPR:$Sm), IIC_fpCMP16, "vcmp", ".f16\t$Sd, $Sm", - [(arm_cmpfp HPR:$Sd, HPR:$Sm, (i32 0))]>; + [(arm_cmpfp HPR:$Sd, HPR:$Sm)]>; } // Defs = [FPSCR_NZCV] //===----------------------------------------------------------------------===// @@ -611,7 +611,7 @@ let Defs = [FPSCR_NZCV] in { def VCMPEZD : ADuI<0b11101, 0b11, 0b0101, 0b11, 0, (outs), (ins DPR:$Dd), IIC_fpCMP64, "vcmpe", ".f64\t$Dd, #0", - [(arm_cmpfp0 (f64 DPR:$Dd), (i32 1))]> { + [/* For disassembly only; pattern left blank */]> { let Inst{3-0} = 0b0000; let Inst{5} = 0; } @@ -619,7 +619,7 @@ def VCMPEZD : ADuI<0b11101, 0b11, 0b0101, 0b11, 0, def VCMPEZS : ASuI<0b11101, 0b11, 0b0101, 0b11, 0, (outs), (ins SPR:$Sd), IIC_fpCMP32, "vcmpe", ".f32\t$Sd, #0", - [(arm_cmpfp0 SPR:$Sd, (i32 1))]> { + [/* For disassembly only; pattern left blank */]> { let Inst{3-0} = 0b0000; let Inst{5} = 0; @@ -631,7 +631,7 @@ def VCMPEZS : ASuI<0b11101, 0b11, 0b0101, 0b11, 0, def VCMPEZH : AHuI<0b11101, 0b11, 0b0101, 0b11, 0, (outs), (ins HPR:$Sd), IIC_fpCMP16, "vcmpe", ".f16\t$Sd, #0", - [(arm_cmpfp0 HPR:$Sd, (i32 1))]> { + [/* For disassembly only; pattern left blank */]> { let Inst{3-0} = 0b0000; let Inst{5} = 0; } @@ -639,7 +639,7 @@ def VCMPEZH : AHuI<0b11101, 0b11, 0b0101, 0b11, 0, def VCMPZD : ADuI<0b11101, 0b11, 0b0101, 0b01, 0, (outs), (ins DPR:$Dd), IIC_fpCMP64, "vcmp", ".f64\t$Dd, #0", - [(arm_cmpfp0 (f64 DPR:$Dd), (i32 0))]> { + [(arm_cmpfp0 (f64 DPR:$Dd))]> { let Inst{3-0} = 0b0000; let Inst{5} = 0; } @@ -647,7 +647,7 @@ def VCMPZD : ADuI<0b11101, 0b11, 0b0101, 0b01, 0, def VCMPZS : ASuI<0b11101, 0b11, 0b0101, 0b01, 0, (outs), (ins SPR:$Sd), IIC_fpCMP32, "vcmp", ".f32\t$Sd, #0", - [(arm_cmpfp0 SPR:$Sd, (i32 0))]> { + [(arm_cmpfp0 SPR:$Sd)]> { let Inst{3-0} = 0b0000; let Inst{5} = 0; @@ -659,7 +659,7 @@ def VCMPZS : ASuI<0b11101, 0b11, 0b0101, 0b01, 0, def VCMPZH : AHuI<0b11101, 0b11, 0b0101, 0b01, 0, (outs), (ins HPR:$Sd), IIC_fpCMP16, "vcmp", ".f16\t$Sd, #0", - [(arm_cmpfp0 HPR:$Sd, (i32 0))]> { + [(arm_cmpfp0 HPR:$Sd)]> { let Inst{3-0} = 0b0000; let Inst{5} = 0; } diff --git a/llvm/test/CodeGen/ARM/2009-07-18-RewriterBug.ll b/llvm/test/CodeGen/ARM/2009-07-18-RewriterBug.ll index 10b5ae4e237fa..9eae0d75e8724 100644 --- a/llvm/test/CodeGen/ARM/2009-07-18-RewriterBug.ll +++ b/llvm/test/CodeGen/ARM/2009-07-18-RewriterBug.ll @@ -1317,19 +1317,19 @@ bb15: } ; CHECK-LABEL: _build_delaunay: -; CHECK: vcmpe -; CHECK: vcmpe -; CHECK: vcmpe -; CHECK: vcmpe -; CHECK: vcmpe -; CHECK: vcmpe -; CHECK: vcmpe -; CHECK: vcmpe -; CHECK: vcmpe -; CHECK: vcmpe -; CHECK: vcmpe -; CHECK: vcmpe -; CHECK: vcmpe +; CHECK: vcmp +; CHECK: vcmp +; CHECK: vcmp +; CHECK: vcmp +; CHECK: vcmp +; CHECK: vcmp +; CHECK: vcmp +; CHECK: vcmp +; CHECK: vcmp +; CHECK: vcmp +; CHECK: vcmp +; CHECK: vcmp +; CHECK: vcmp declare i32 @puts(i8* nocapture) nounwind diff --git a/llvm/test/CodeGen/ARM/arm-shrink-wrapping.ll b/llvm/test/CodeGen/ARM/arm-shrink-wrapping.ll index 4b043362afaf8..99936cd7eef99 100644 --- a/llvm/test/CodeGen/ARM/arm-shrink-wrapping.ll +++ b/llvm/test/CodeGen/ARM/arm-shrink-wrapping.ll @@ -1781,7 +1781,7 @@ define float @debug_info(float %gamma, float %slopeLimit, i1 %or.cond, double %t ; ARM-NEXT: vmov.f32 s0, #1.000000e+00 ; ARM-NEXT: vmov.f64 d16, #1.000000e+00 ; ARM-NEXT: vadd.f64 d16, d9, d16 -; ARM-NEXT: vcmpe.f32 s16, s0 +; ARM-NEXT: vcmp.f32 s16, s0 ; ARM-NEXT: vmrs APSR_nzcv, fpscr ; ARM-NEXT: vmov d17, r0, r1 ; ARM-NEXT: vmov.f64 d18, d9 @@ -1828,7 +1828,7 @@ define float @debug_info(float %gamma, float %slopeLimit, i1 %or.cond, double %t ; THUMB-NEXT: vmov.f32 s0, #1.000000e+00 ; THUMB-NEXT: vmov.f64 d16, #1.000000e+00 ; THUMB-NEXT: vmov.f64 d18, d9 -; THUMB-NEXT: vcmpe.f32 s16, s0 +; THUMB-NEXT: vcmp.f32 s16, s0 ; THUMB-NEXT: vadd.f64 d16, d9, d16 ; THUMB-NEXT: vmrs APSR_nzcv, fpscr ; THUMB-NEXT: it gt diff --git a/llvm/test/CodeGen/ARM/compare-call.ll b/llvm/test/CodeGen/ARM/compare-call.ll index f45ed73adb714..47f20a28b8ac3 100644 --- a/llvm/test/CodeGen/ARM/compare-call.ll +++ b/llvm/test/CodeGen/ARM/compare-call.ll @@ -18,5 +18,5 @@ UnifiedReturnBlock: ; preds = %entry declare i32 @bar(...) -; CHECK: vcmpe.f32 +; CHECK: vcmp.f32 diff --git a/llvm/test/CodeGen/ARM/fcmp-xo.ll b/llvm/test/CodeGen/ARM/fcmp-xo.ll index 8ff3b9017a5ef..3d5972f065859 100644 --- a/llvm/test/CodeGen/ARM/fcmp-xo.ll +++ b/llvm/test/CodeGen/ARM/fcmp-xo.ll @@ -5,7 +5,7 @@ define arm_aapcs_vfpcc float @foo0(float %a0) local_unnamed_addr { ; CHECK-LABEL: foo0: ; CHECK: @ %bb.0: -; CHECK-NEXT: vcmpe.f32 s0, #0 +; CHECK-NEXT: vcmp.f32 s0, #0 ; CHECK-NEXT: vmov.f32 s2, #5.000000e-01 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vmov.f32 s4, #-5.000000e-01 @@ -24,7 +24,7 @@ define arm_aapcs_vfpcc float @float1(float %a0) local_unnamed_addr { ; CHECK-NEXT: vmov.f32 s2, #1.000000e+00 ; CHECK-NEXT: vmov.f32 s4, #5.000000e-01 ; CHECK-NEXT: vmov.f32 s6, #-5.000000e-01 -; CHECK-NEXT: vcmpe.f32 s2, s0 +; CHECK-NEXT: vcmp.f32 s2, s0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vselgt.f32 s0, s6, s4 ; CHECK-NEXT: bx lr @@ -46,7 +46,7 @@ define arm_aapcs_vfpcc float @float128(float %a0) local_unnamed_addr { ; VMOVSR-NEXT: vmov.f32 s4, #5.000000e-01 ; VMOVSR-NEXT: vmov s2, r0 ; VMOVSR-NEXT: vmov.f32 s6, #-5.000000e-01 -; VMOVSR-NEXT: vcmpe.f32 s2, s0 +; VMOVSR-NEXT: vcmp.f32 s2, s0 ; VMOVSR-NEXT: vmrs APSR_nzcv, fpscr ; VMOVSR-NEXT: vselgt.f32 s0, s6, s4 ; VMOVSR-NEXT: bx lr @@ -57,7 +57,7 @@ define arm_aapcs_vfpcc float @float128(float %a0) local_unnamed_addr { ; NEON-NEXT: vmov.f32 s2, #5.000000e-01 ; NEON-NEXT: vmov d3, r0, r0 ; NEON-NEXT: vmov.f32 s4, #-5.000000e-01 -; NEON-NEXT: vcmpe.f32 s6, s0 +; NEON-NEXT: vcmp.f32 s6, s0 ; NEON-NEXT: vmrs APSR_nzcv, fpscr ; NEON-NEXT: vselgt.f32 s0, s4, s2 ; NEON-NEXT: bx lr @@ -70,7 +70,7 @@ define arm_aapcs_vfpcc double @double1(double %a0) local_unnamed_addr { ; CHECK-LABEL: double1: ; CHECK: @ %bb.0: ; CHECK-NEXT: vmov.f64 d18, #1.000000e+00 -; CHECK-NEXT: vcmpe.f64 d18, d0 +; CHECK-NEXT: vcmp.f64 d18, d0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vmov.f64 d16, #5.000000e-01 ; CHECK-NEXT: vmov.f64 d17, #-5.000000e-01 @@ -89,7 +89,7 @@ define arm_aapcs_vfpcc double @double128(double %a0) local_unnamed_addr { ; CHECK-NEXT: movt r0, #16480 ; CHECK-NEXT: vmov.f64 d16, #5.000000e-01 ; CHECK-NEXT: vmov d18, r1, r0 -; CHECK-NEXT: vcmpe.f64 d18, d0 +; CHECK-NEXT: vcmp.f64 d18, d0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vmov.f64 d17, #-5.000000e-01 ; CHECK-NEXT: vselgt.f64 d0, d17, d16 diff --git a/llvm/test/CodeGen/ARM/float-helpers.s b/llvm/test/CodeGen/ARM/float-helpers.s index d5388a372b887..1225b4c999f16 100644 --- a/llvm/test/CodeGen/ARM/float-helpers.s +++ b/llvm/test/CodeGen/ARM/float-helpers.s @@ -174,13 +174,13 @@ define i32 @fcmplt(float %a, float %b) #0 { ; CHECK-SOFTFP: vmov s2, r0 ; CHECK-SOFTFP-NEXT: mov r0, #0 ; CHECK-SOFTFP-NEXT: vmov s0, r1 -; CHECK-SOFTFP-NEXT: vcmpe.f32 s2, s0 +; CHECK-SOFTFP-NEXT: vcmp.f32 s2, s0 ; CHECK-SOFTFP-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-SOFTFP-NEXT: movmi r0, #1 ; CHECK-SOFTFP-NEXT: mov pc, lr ; ; CHECK-HARDFP-SP-LABEL: fcmplt: -; CHECK-HARDFP-SP: vcmpe.f32 s0, s1 +; CHECK-HARDFP-SP: vcmp.f32 s0, s1 ; CHECK-HARDFP-SP-NEXT: mov r0, #0 ; CHECK-HARDFP-SP-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-HARDFP-SP-NEXT: movmi r0, #1 @@ -205,13 +205,13 @@ define i32 @fcmple(float %a, float %b) #0 { ; CHECK-SOFTFP: vmov s2, r0 ; CHECK-SOFTFP-NEXT: mov r0, #0 ; CHECK-SOFTFP-NEXT: vmov s0, r1 -; CHECK-SOFTFP-NEXT: vcmpe.f32 s2, s0 +; CHECK-SOFTFP-NEXT: vcmp.f32 s2, s0 ; CHECK-SOFTFP-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-SOFTFP-NEXT: movls r0, #1 ; CHECK-SOFTFP-NEXT: mov pc, lr ; ; CHECK-HARDFP-SP-LABEL: fcmple: -; CHECK-HARDFP-SP: vcmpe.f32 s0, s1 +; CHECK-HARDFP-SP: vcmp.f32 s0, s1 ; CHECK-HARDFP-SP-NEXT: mov r0, #0 ; CHECK-HARDFP-SP-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-HARDFP-SP-NEXT: movls r0, #1 @@ -236,13 +236,13 @@ define i32 @fcmpge(float %a, float %b) #0 { ; CHECK-SOFTFP: vmov s2, r0 ; CHECK-SOFTFP-NEXT: mov r0, #0 ; CHECK-SOFTFP-NEXT: vmov s0, r1 -; CHECK-SOFTFP-NEXT: vcmpe.f32 s2, s0 +; CHECK-SOFTFP-NEXT: vcmp.f32 s2, s0 ; CHECK-SOFTFP-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-SOFTFP-NEXT: movge r0, #1 ; CHECK-SOFTFP-NEXT: mov pc, lr ; ; CHECK-HARDFP-SP-LABEL: fcmpge: -; CHECK-HARDFP-SP: vcmpe.f32 s0, s1 +; CHECK-HARDFP-SP: vcmp.f32 s0, s1 ; CHECK-HARDFP-SP-NEXT: mov r0, #0 ; CHECK-HARDFP-SP-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-HARDFP-SP-NEXT: movge r0, #1 @@ -267,13 +267,13 @@ define i32 @fcmpgt(float %a, float %b) #0 { ; CHECK-SOFTFP: vmov s2, r0 ; CHECK-SOFTFP-NEXT: mov r0, #0 ; CHECK-SOFTFP-NEXT: vmov s0, r1 -; CHECK-SOFTFP-NEXT: vcmpe.f32 s2, s0 +; CHECK-SOFTFP-NEXT: vcmp.f32 s2, s0 ; CHECK-SOFTFP-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-SOFTFP-NEXT: movgt r0, #1 ; CHECK-SOFTFP-NEXT: mov pc, lr ; ; CHECK-HARDFP-SP-LABEL: fcmpgt: -; CHECK-HARDFP-SP: vcmpe.f32 s0, s1 +; CHECK-HARDFP-SP: vcmp.f32 s0, s1 ; CHECK-HARDFP-SP-NEXT: mov r0, #0 ; CHECK-HARDFP-SP-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-HARDFP-SP-NEXT: movgt r0, #1 @@ -298,13 +298,13 @@ define i32 @fcmpun(float %a, float %b) #0 { ; CHECK-SOFTFP: vmov s2, r0 ; CHECK-SOFTFP-NEXT: mov r0, #0 ; CHECK-SOFTFP-NEXT: vmov s0, r1 -; CHECK-SOFTFP-NEXT: vcmpe.f32 s2, s0 +; CHECK-SOFTFP-NEXT: vcmp.f32 s2, s0 ; CHECK-SOFTFP-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-SOFTFP-NEXT: movvs r0, #1 ; CHECK-SOFTFP-NEXT: mov pc, lr ; ; CHECK-HARDFP-SP-LABEL: fcmpun: -; CHECK-HARDFP-SP: vcmpe.f32 s0, s1 +; CHECK-HARDFP-SP: vcmp.f32 s0, s1 ; CHECK-HARDFP-SP-NEXT: mov r0, #0 ; CHECK-HARDFP-SP-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-HARDFP-SP-NEXT: movvs r0, #1 @@ -503,13 +503,13 @@ define i32 @dcmplt(double %a, double %b) #0 { ; CHECK-SOFTFP: vmov d16, r2, r3 ; CHECK-SOFTFP-NEXT: vmov d17, r0, r1 ; CHECK-SOFTFP-NEXT: mov r0, #0 -; CHECK-SOFTFP-NEXT: vcmpe.f64 d17, d16 +; CHECK-SOFTFP-NEXT: vcmp.f64 d17, d16 ; CHECK-SOFTFP-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-SOFTFP-NEXT: movmi r0, #1 ; CHECK-SOFTFP-NEXT: mov pc, lr ; ; CHECK-HARDFP-DP-LABEL: dcmplt: -; CHECK-HARDFP-DP: vcmpe.f64 d0, d1 +; CHECK-HARDFP-DP: vcmp.f64 d0, d1 ; CHECK-HARDFP-DP-NEXT: mov r0, #0 ; CHECK-HARDFP-DP-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-HARDFP-DP-NEXT: movmi r0, #1 @@ -545,13 +545,13 @@ define i32 @dcmple(double %a, double %b) #0 { ; CHECK-SOFTFP: vmov d16, r2, r3 ; CHECK-SOFTFP-NEXT: vmov d17, r0, r1 ; CHECK-SOFTFP-NEXT: mov r0, #0 -; CHECK-SOFTFP-NEXT: vcmpe.f64 d17, d16 +; CHECK-SOFTFP-NEXT: vcmp.f64 d17, d16 ; CHECK-SOFTFP-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-SOFTFP-NEXT: movls r0, #1 ; CHECK-SOFTFP-NEXT: mov pc, lr ; ; CHECK-HARDFP-DP-LABEL: dcmple: -; CHECK-HARDFP-DP: vcmpe.f64 d0, d1 +; CHECK-HARDFP-DP: vcmp.f64 d0, d1 ; CHECK-HARDFP-DP-NEXT: mov r0, #0 ; CHECK-HARDFP-DP-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-HARDFP-DP-NEXT: movls r0, #1 @@ -587,13 +587,13 @@ define i32 @dcmpge(double %a, double %b) #0 { ; CHECK-SOFTFP: vmov d16, r2, r3 ; CHECK-SOFTFP-NEXT: vmov d17, r0, r1 ; CHECK-SOFTFP-NEXT: mov r0, #0 -; CHECK-SOFTFP-NEXT: vcmpe.f64 d17, d16 +; CHECK-SOFTFP-NEXT: vcmp.f64 d17, d16 ; CHECK-SOFTFP-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-SOFTFP-NEXT: movge r0, #1 ; CHECK-SOFTFP-NEXT: mov pc, lr ; ; CHECK-HARDFP-DP-LABEL: dcmpge: -; CHECK-HARDFP-DP: vcmpe.f64 d0, d1 +; CHECK-HARDFP-DP: vcmp.f64 d0, d1 ; CHECK-HARDFP-DP-NEXT: mov r0, #0 ; CHECK-HARDFP-DP-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-HARDFP-DP-NEXT: movge r0, #1 @@ -629,13 +629,13 @@ define i32 @dcmpgt(double %a, double %b) #0 { ; CHECK-SOFTFP: vmov d16, r2, r3 ; CHECK-SOFTFP-NEXT: vmov d17, r0, r1 ; CHECK-SOFTFP-NEXT: mov r0, #0 -; CHECK-SOFTFP-NEXT: vcmpe.f64 d17, d16 +; CHECK-SOFTFP-NEXT: vcmp.f64 d17, d16 ; CHECK-SOFTFP-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-SOFTFP-NEXT: movgt r0, #1 ; CHECK-SOFTFP-NEXT: mov pc, lr ; ; CHECK-HARDFP-DP-LABEL: dcmpgt: -; CHECK-HARDFP-DP: vcmpe.f64 d0, d1 +; CHECK-HARDFP-DP: vcmp.f64 d0, d1 ; CHECK-HARDFP-DP-NEXT: mov r0, #0 ; CHECK-HARDFP-DP-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-HARDFP-DP-NEXT: movgt r0, #1 @@ -671,13 +671,13 @@ define i32 @dcmpun(double %a, double %b) #0 { ; CHECK-SOFTFP: vmov d16, r2, r3 ; CHECK-SOFTFP-NEXT: vmov d17, r0, r1 ; CHECK-SOFTFP-NEXT: mov r0, #0 -; CHECK-SOFTFP-NEXT: vcmpe.f64 d17, d16 +; CHECK-SOFTFP-NEXT: vcmp.f64 d17, d16 ; CHECK-SOFTFP-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-SOFTFP-NEXT: movvs r0, #1 ; CHECK-SOFTFP-NEXT: mov pc, lr ; ; CHECK-HARDFP-DP-LABEL: dcmpun: -; CHECK-HARDFP-DP: vcmpe.f64 d0, d1 +; CHECK-HARDFP-DP: vcmp.f64 d0, d1 ; CHECK-HARDFP-DP-NEXT: mov r0, #0 ; CHECK-HARDFP-DP-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-HARDFP-DP-NEXT: movvs r0, #1 diff --git a/llvm/test/CodeGen/ARM/fp16-instructions.ll b/llvm/test/CodeGen/ARM/fp16-instructions.ll index a8fc532070e0d..260dd12b3e234 100644 --- a/llvm/test/CodeGen/ARM/fp16-instructions.ll +++ b/llvm/test/CodeGen/ARM/fp16-instructions.ll @@ -164,9 +164,9 @@ entry: ; CHECK-LABEL: VCMPE1: ; CHECK-SOFT: bl __aeabi_fcmplt -; CHECK-SOFTFP-FP16: vcmpe.f32 s0, #0 -; CHECK-SOFTFP-FULLFP16: vcmpe.f16 s0, #0 -; CHECK-HARDFP-FULLFP16: vcmpe.f16 s0, #0 +; CHECK-SOFTFP-FP16: vcmp.f32 s0, #0 +; CHECK-SOFTFP-FULLFP16: vcmp.f16 s0, #0 +; CHECK-HARDFP-FULLFP16: vcmp.f16 s0, #0 } define i32 @VCMPE2(float %F.coerce, float %G.coerce) { @@ -184,9 +184,9 @@ entry: ; CHECK-LABEL: VCMPE2: ; CHECK-SOFT: bl __aeabi_fcmplt -; CHECK-SOFTFP-FP16: vcmpe.f32 s{{.}}, s{{.}} -; CHECK-SOFTFP-FULLFP16: vcmpe.f16 s{{.}}, s{{.}} -; CHECK-HARDFP-FULLFP16: vcmpe.f16 s{{.}}, s{{.}} +; CHECK-SOFTFP-FP16: vcmp.f32 s{{.}}, s{{.}} +; CHECK-SOFTFP-FULLFP16: vcmp.f16 s{{.}}, s{{.}} +; CHECK-HARDFP-FULLFP16: vcmp.f16 s{{.}}, s{{.}} } ; Test lowering of BR_CC @@ -212,10 +212,10 @@ for.end: ; CHECK-SOFT: cmp r0, #{{0|1}} ; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S2:s[0-9]]], [[S2]] -; CHECK-SOFTFP-FP16: vcmpe.f32 [[S2]], s0 +; CHECK-SOFTFP-FP16: vcmp.f32 [[S2]], s0 ; CHECK-SOFTFP-FP16: vmrs APSR_nzcv, fpscr -; CHECK-SOFTFP-FULLFP16: vcmpe.f16 s{{.}}, s{{.}} +; CHECK-SOFTFP-FULLFP16: vcmp.f16 s{{.}}, s{{.}} ; CHECK-SOFTFP-FULLFP16: vmrs APSR_nzcv, fpscr } @@ -727,15 +727,15 @@ define half @select_cc_ge1(half* %a0) { ; CHECK-LABEL: select_cc_ge1: -; CHECK-HARDFP-FULLFP16: vcmpe.f16 s6, s0 +; CHECK-HARDFP-FULLFP16: vcmp.f16 s6, s0 ; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-HARDFP-FULLFP16-NEXT: vselge.f16 s0, s{{.}}, s{{.}} -; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s6, s0 +; CHECK-SOFTFP-FP16-A32: vcmp.f32 s6, s0 ; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-SOFTFP-FP16-A32-NEXT: vmovge.f32 s{{.}}, s{{.}} -; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s6, s0 +; CHECK-SOFTFP-FP16-T32: vcmp.f32 s6, s0 ; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-SOFTFP-FP16-T32-NEXT: it ge ; CHECK-SOFTFP-FP16-T32-NEXT: vmovge.f32 s{{.}}, s{{.}} @@ -749,15 +749,15 @@ define half @select_cc_ge2(half* %a0) { ; CHECK-LABEL: select_cc_ge2: -; CHECK-HARDFP-FULLFP16: vcmpe.f16 s0, s6 +; CHECK-HARDFP-FULLFP16: vcmp.f16 s0, s6 ; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-HARDFP-FULLFP16-NEXT: vselge.f16 s0, s{{.}}, s{{.}} -; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s6, s0 +; CHECK-SOFTFP-FP16-A32: vcmp.f32 s6, s0 ; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-SOFTFP-FP16-A32-NEXT: vmovls.f32 s{{.}}, s{{.}} -; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s6, s0 +; CHECK-SOFTFP-FP16-T32: vcmp.f32 s6, s0 ; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-SOFTFP-FP16-T32-NEXT: it ls ; CHECK-SOFTFP-FP16-T32-NEXT: vmovls.f32 s{{.}}, s{{.}} @@ -771,15 +771,15 @@ define half @select_cc_ge3(half* %a0) { ; CHECK-LABEL: select_cc_ge3: -; CHECK-HARDFP-FULLFP16: vcmpe.f16 s0, s6 +; CHECK-HARDFP-FULLFP16: vcmp.f16 s0, s6 ; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-HARDFP-FULLFP16-NEXT: vselge.f16 s0, s{{.}}, s{{.}} -; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s6, s0 +; CHECK-SOFTFP-FP16-A32: vcmp.f32 s6, s0 ; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-SOFTFP-FP16-A32-NEXT: vmovhi.f32 s{{.}}, s{{.}} -; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s6, s0 +; CHECK-SOFTFP-FP16-T32: vcmp.f32 s6, s0 ; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-SOFTFP-FP16-T32-NEXT: it hi ; CHECK-SOFTFP-FP16-T32-NEXT: vmovhi.f32 s{{.}}, s{{.}} @@ -793,15 +793,15 @@ define half @select_cc_ge4(half* %a0) { ; CHECK-LABEL: select_cc_ge4: -; CHECK-HARDFP-FULLFP16: vcmpe.f16 s6, s0 +; CHECK-HARDFP-FULLFP16: vcmp.f16 s6, s0 ; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-HARDFP-FULLFP16-NEXT: vselge.f16 s0, s{{.}}, s{{.}} -; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s6, s0 +; CHECK-SOFTFP-FP16-A32: vcmp.f32 s6, s0 ; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-SOFTFP-FP16-A32-NEXT: vmovlt.f32 s{{.}}, s{{.}} -; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s6, s0 +; CHECK-SOFTFP-FP16-T32: vcmp.f32 s6, s0 ; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-SOFTFP-FP16-T32-NEXT: it lt ; CHECK-SOFTFP-FP16-T32-NEXT: vmovlt.f32 s{{.}}, s{{.}} @@ -816,15 +816,15 @@ define half @select_cc_gt1(half* %a0) { ; CHECK-LABEL: select_cc_gt1: -; CHECK-HARDFP-FULLFP16: vcmpe.f16 s6, s0 +; CHECK-HARDFP-FULLFP16: vcmp.f16 s6, s0 ; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-HARDFP-FULLFP16-NEXT: vselgt.f16 s0, s{{.}}, s{{.}} -; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s6, s0 +; CHECK-SOFTFP-FP16-A32: vcmp.f32 s6, s0 ; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-SOFTFP-FP16-A32-NEXT: vmovgt.f32 s{{.}}, s{{.}} -; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s6, s0 +; CHECK-SOFTFP-FP16-T32: vcmp.f32 s6, s0 ; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-SOFTFP-FP16-T32-NEXT: it gt ; CHECK-SOFTFP-FP16-T32-NEXT: vmovgt.f32 s{{.}}, s{{.}} @@ -838,15 +838,15 @@ define half @select_cc_gt2(half* %a0) { ; CHECK-LABEL: select_cc_gt2: -; CHECK-HARDFP-FULLFP16: vcmpe.f16 s0, s6 +; CHECK-HARDFP-FULLFP16: vcmp.f16 s0, s6 ; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-HARDFP-FULLFP16-NEXT: vselgt.f16 s0, s{{.}}, s{{.}} -; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s6, s0 +; CHECK-SOFTFP-FP16-A32: vcmp.f32 s6, s0 ; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-SOFTFP-FP16-A32-NEXT: vmovpl.f32 s{{.}}, s{{.}} -; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s6, s0 +; CHECK-SOFTFP-FP16-T32: vcmp.f32 s6, s0 ; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-SOFTFP-FP16-T32-NEXT: it pl ; CHECK-SOFTFP-FP16-T32-NEXT: vmovpl.f32 s{{.}}, s{{.}} @@ -860,15 +860,15 @@ define half @select_cc_gt3(half* %a0) { ; CHECK-LABEL: select_cc_gt3: -; CHECK-HARDFP-FULLFP16: vcmpe.f16 s6, s0 +; CHECK-HARDFP-FULLFP16: vcmp.f16 s6, s0 ; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-HARDFP-FULLFP16-NEXT: vselgt.f16 s0, s{{.}}, s{{.}} -; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s6, s0 +; CHECK-SOFTFP-FP16-A32: vcmp.f32 s6, s0 ; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-SOFTFP-FP16-A32-NEXT: vmovle.f32 s{{.}}, s{{.}} -; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s6, s0 +; CHECK-SOFTFP-FP16-T32: vcmp.f32 s6, s0 ; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-SOFTFP-FP16-T32-NEXT: it le ; CHECK-SOFTFP-FP16-T32-NEXT: vmovle.f32 s{{.}}, s{{.}} @@ -882,15 +882,15 @@ define half @select_cc_gt4(half* %a0) { ; CHECK-LABEL: select_cc_gt4: -; CHECK-HARDFP-FULLFP16: vcmpe.f16 s0, s6 +; CHECK-HARDFP-FULLFP16: vcmp.f16 s0, s6 ; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-HARDFP-FULLFP16-NEXT: vselgt.f16 s0, s{{.}}, s{{.}} -; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s6, s0 +; CHECK-SOFTFP-FP16-A32: vcmp.f32 s6, s0 ; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-SOFTFP-FP16-A32-NEXT: vmovmi.f32 s{{.}}, s{{.}} -; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s6, s0 +; CHECK-SOFTFP-FP16-T32: vcmp.f32 s6, s0 ; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-SOFTFP-FP16-T32-NEXT: it mi ; CHECK-SOFTFP-FP16-T32-NEXT: vmovmi.f32 s{{.}}, s{{.}} diff --git a/llvm/test/CodeGen/ARM/fp16-promote.ll b/llvm/test/CodeGen/ARM/fp16-promote.ll index f382144cf95fc..183653036f334 100644 --- a/llvm/test/CodeGen/ARM/fp16-promote.ll +++ b/llvm/test/CodeGen/ARM/fp16-promote.ll @@ -202,7 +202,7 @@ define i1 @test_fcmp_ueq(half* %p, half* %q) #0 { ; CHECK-FP16: vcvtb.f32.f16 ; CHECK-LIBCALL: bl __aeabi_h2f ; CHECK-LIBCALL: bl __aeabi_h2f -; CHECK-VFP: vcmpe.f32 +; CHECK-VFP: vcmp.f32 ; CHECK-NOVFP: bl __aeabi_fcmplt ; CHECK-FP16: vmrs APSR_nzcv, fpscr ; CHECK-VFP: strmi diff --git a/llvm/test/CodeGen/ARM/fpcmp.ll b/llvm/test/CodeGen/ARM/fpcmp.ll index 67326e0001697..b8fc21f8146e9 100644 --- a/llvm/test/CodeGen/ARM/fpcmp.ll +++ b/llvm/test/CodeGen/ARM/fpcmp.ll @@ -2,7 +2,7 @@ define i32 @f1(float %a) { ;CHECK-LABEL: f1: -;CHECK: vcmpe.f32 +;CHECK: vcmp.f32 ;CHECK: movmi entry: %tmp = fcmp olt float %a, 1.000000e+00 ; [#uses=1] @@ -22,7 +22,7 @@ entry: define i32 @f3(float %a) { ;CHECK-LABEL: f3: -;CHECK: vcmpe.f32 +;CHECK: vcmp.f32 ;CHECK: movgt entry: %tmp = fcmp ogt float %a, 1.000000e+00 ; [#uses=1] @@ -32,7 +32,7 @@ entry: define i32 @f4(float %a) { ;CHECK-LABEL: f4: -;CHECK: vcmpe.f32 +;CHECK: vcmp.f32 ;CHECK: movge entry: %tmp = fcmp oge float %a, 1.000000e+00 ; [#uses=1] @@ -42,7 +42,7 @@ entry: define i32 @f5(float %a) { ;CHECK-LABEL: f5: -;CHECK: vcmpe.f32 +;CHECK: vcmp.f32 ;CHECK: movls entry: %tmp = fcmp ole float %a, 1.000000e+00 ; [#uses=1] @@ -62,7 +62,7 @@ entry: define i32 @g1(double %a) { ;CHECK-LABEL: g1: -;CHECK: vcmpe.f64 +;CHECK: vcmp.f64 ;CHECK: movmi entry: %tmp = fcmp olt double %a, 1.000000e+00 ; [#uses=1] diff --git a/llvm/test/CodeGen/ARM/ifcvt11.ll b/llvm/test/CodeGen/ARM/ifcvt11.ll index eae41e21c610e..7d577065a6d2d 100644 --- a/llvm/test/CodeGen/ARM/ifcvt11.ll +++ b/llvm/test/CodeGen/ARM/ifcvt11.ll @@ -17,7 +17,7 @@ bb.nph: ; preds = %entry br label %bb bb: ; preds = %bb4, %bb.nph -; CHECK: vcmpe.f64 +; CHECK: vcmp.f64 ; CHECK: vmrs APSR_nzcv, fpscr %r.19 = phi i32 [ 0, %bb.nph ], [ %r.0, %bb4 ] %n.08 = phi i32 [ 0, %bb.nph ], [ %10, %bb4 ] @@ -30,9 +30,9 @@ bb: ; preds = %bb4, %bb.nph bb1: ; preds = %bb ; CHECK-NOT: it -; CHECK-NOT: vcmpemi +; CHECK-NOT: vcmpmi ; CHECK-NOT: vmrsmi -; CHECK: vcmpe.f64 +; CHECK: vcmp.f64 ; CHECK: vmrs APSR_nzcv, fpscr %scevgep12 = getelementptr %struct.xyz_t, %struct.xyz_t* %p, i32 %n.08, i32 2 %6 = load double, double* %scevgep12, align 4 diff --git a/llvm/test/CodeGen/ARM/swifterror.ll b/llvm/test/CodeGen/ARM/swifterror.ll index 6424754a982fa..d96bc0249b42f 100644 --- a/llvm/test/CodeGen/ARM/swifterror.ll +++ b/llvm/test/CodeGen/ARM/swifterror.ll @@ -194,7 +194,7 @@ define float @foo_loop(%swift_error** swifterror %error_ptr_ref, i32 %cc, float ; CHECK-O0: strb [[ID2]], [{{.*}}[[ID]], #8] ; spill r0 ; CHECK-O0: str r0, [sp{{.*}}] -; CHECK-O0: vcmpe +; CHECK-O0: vcmp ; CHECK-O0: ble ; reload from stack ; CHECK-O0: ldr r8 diff --git a/llvm/test/CodeGen/ARM/vcmp-crash.ll b/llvm/test/CodeGen/ARM/vcmp-crash.ll deleted file mode 100644 index 2d3262be5849b..0000000000000 --- a/llvm/test/CodeGen/ARM/vcmp-crash.ll +++ /dev/null @@ -1,11 +0,0 @@ -; RUN: llc -mcpu=cortex-m4 < %s | FileCheck %s - -target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" -target triple = "thumbv7em-none--eabi" - -; CHECK: vcmp.f32 -define double @f(double %a, double %b, double %c, float %d) { - %1 = fcmp oeq float %d, 0.0 - %2 = select i1 %1, double %a, double %c - ret double %2 -} diff --git a/llvm/test/CodeGen/ARM/vfp.ll b/llvm/test/CodeGen/ARM/vfp.ll index 8fa5113d8a31f..c18855abd877f 100644 --- a/llvm/test/CodeGen/ARM/vfp.ll +++ b/llvm/test/CodeGen/ARM/vfp.ll @@ -142,7 +142,7 @@ define void @test_cmpfp0(float* %glob, i32 %X) { ;CHECK-LABEL: test_cmpfp0: entry: %tmp = load float, float* %glob ; [#uses=1] -;CHECK: vcmpe.f32 +;CHECK: vcmp.f32 %tmp.upgrd.3 = fcmp ogt float %tmp, 0.000000e+00 ; [#uses=1] br i1 %tmp.upgrd.3, label %cond_true, label %cond_false diff --git a/llvm/test/CodeGen/ARM/vsel-fp16.ll b/llvm/test/CodeGen/ARM/vsel-fp16.ll index 9ccc6f4272830..fda1fcb5f87cd 100644 --- a/llvm/test/CodeGen/ARM/vsel-fp16.ll +++ b/llvm/test/CodeGen/ARM/vsel-fp16.ll @@ -106,7 +106,7 @@ define void @test_vsel32ogt(half* %lhs_ptr, half* %rhs_ptr, half* %a_ptr, half* ; CHECK-NEXT: vldr.16 s4, [r0] ; CHECK-NEXT: vldr.16 s6, [r1] ; CHECK-NEXT: movw r0, :lower16:varhalf -; CHECK-NEXT: vcmpe.f16 s4, s6 +; CHECK-NEXT: vcmp.f16 s4, s6 ; CHECK-NEXT: movt r0, :upper16:varhalf ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vselgt.f16 s0, s0, s2 @@ -130,7 +130,7 @@ define void @test_vsel32oge(half* %lhs_ptr, half* %rhs_ptr, half* %a_ptr, half* ; CHECK-NEXT: vldr.16 s4, [r0] ; CHECK-NEXT: vldr.16 s6, [r1] ; CHECK-NEXT: movw r0, :lower16:varhalf -; CHECK-NEXT: vcmpe.f16 s4, s6 +; CHECK-NEXT: vcmp.f16 s4, s6 ; CHECK-NEXT: movt r0, :upper16:varhalf ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vselge.f16 s0, s0, s2 @@ -178,7 +178,7 @@ define void @test_vsel32ugt(half* %lhs_ptr, half* %rhs_ptr, half* %a_ptr, half* ; CHECK-NEXT: vldr.16 s4, [r0] ; CHECK-NEXT: vldr.16 s6, [r1] ; CHECK-NEXT: movw r0, :lower16:varhalf -; CHECK-NEXT: vcmpe.f16 s6, s4 +; CHECK-NEXT: vcmp.f16 s6, s4 ; CHECK-NEXT: movt r0, :upper16:varhalf ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vselge.f16 s0, s2, s0 @@ -202,7 +202,7 @@ define void @test_vsel32uge(half* %lhs_ptr, half* %rhs_ptr, half* %a_ptr, half* ; CHECK-NEXT: vldr.16 s4, [r0] ; CHECK-NEXT: vldr.16 s6, [r1] ; CHECK-NEXT: movw r0, :lower16:varhalf -; CHECK-NEXT: vcmpe.f16 s6, s4 +; CHECK-NEXT: vcmp.f16 s6, s4 ; CHECK-NEXT: movt r0, :upper16:varhalf ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vselgt.f16 s0, s2, s0 @@ -226,7 +226,7 @@ define void @test_vsel32olt(half* %lhs_ptr, half* %rhs_ptr, half* %a_ptr, half* ; CHECK-NEXT: vldr.16 s4, [r0] ; CHECK-NEXT: vldr.16 s6, [r1] ; CHECK-NEXT: movw r0, :lower16:varhalf -; CHECK-NEXT: vcmpe.f16 s6, s4 +; CHECK-NEXT: vcmp.f16 s6, s4 ; CHECK-NEXT: movt r0, :upper16:varhalf ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vselgt.f16 s0, s0, s2 @@ -250,7 +250,7 @@ define void @test_vsel32ult(half* %lhs_ptr, half* %rhs_ptr, half* %a_ptr, half* ; CHECK-NEXT: vldr.16 s4, [r0] ; CHECK-NEXT: vldr.16 s6, [r1] ; CHECK-NEXT: movw r0, :lower16:varhalf -; CHECK-NEXT: vcmpe.f16 s4, s6 +; CHECK-NEXT: vcmp.f16 s4, s6 ; CHECK-NEXT: movt r0, :upper16:varhalf ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vselge.f16 s0, s2, s0 @@ -274,7 +274,7 @@ define void @test_vsel32ole(half* %lhs_ptr, half* %rhs_ptr, half* %a_ptr, half* ; CHECK-NEXT: vldr.16 s4, [r0] ; CHECK-NEXT: vldr.16 s6, [r1] ; CHECK-NEXT: movw r0, :lower16:varhalf -; CHECK-NEXT: vcmpe.f16 s6, s4 +; CHECK-NEXT: vcmp.f16 s6, s4 ; CHECK-NEXT: movt r0, :upper16:varhalf ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vselge.f16 s0, s0, s2 @@ -298,7 +298,7 @@ define void @test_vsel32ule(half* %lhs_ptr, half* %rhs_ptr, half* %a_ptr, half* ; CHECK-NEXT: vldr.16 s4, [r0] ; CHECK-NEXT: vldr.16 s6, [r1] ; CHECK-NEXT: movw r0, :lower16:varhalf -; CHECK-NEXT: vcmpe.f16 s4, s6 +; CHECK-NEXT: vcmp.f16 s4, s6 ; CHECK-NEXT: movt r0, :upper16:varhalf ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vselgt.f16 s0, s2, s0 @@ -322,7 +322,7 @@ define void @test_vsel32ord(half* %lhs_ptr, half* %rhs_ptr, half* %a_ptr, half* ; CHECK-NEXT: vldr.16 s4, [r0] ; CHECK-NEXT: vldr.16 s6, [r1] ; CHECK-NEXT: movw r0, :lower16:varhalf -; CHECK-NEXT: vcmpe.f16 s4, s6 +; CHECK-NEXT: vcmp.f16 s4, s6 ; CHECK-NEXT: movt r0, :upper16:varhalf ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vselvs.f16 s0, s2, s0 @@ -370,7 +370,7 @@ define void @test_vsel32uno(half* %lhs_ptr, half* %rhs_ptr, half* %a_ptr, half* ; CHECK-NEXT: vldr.16 s4, [r0] ; CHECK-NEXT: vldr.16 s6, [r1] ; CHECK-NEXT: movw r0, :lower16:varhalf -; CHECK-NEXT: vcmpe.f16 s4, s6 +; CHECK-NEXT: vcmp.f16 s4, s6 ; CHECK-NEXT: movt r0, :upper16:varhalf ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vselvs.f16 s0, s0, s2 @@ -395,7 +395,7 @@ define void @test_vsel32ogt_nnan(half* %lhs_ptr, half* %rhs_ptr, half* %a_ptr, h ; CHECK-NEXT: vldr.16 s4, [r0] ; CHECK-NEXT: vldr.16 s6, [r1] ; CHECK-NEXT: movw r0, :lower16:varhalf -; CHECK-NEXT: vcmpe.f16 s4, s6 +; CHECK-NEXT: vcmp.f16 s4, s6 ; CHECK-NEXT: movt r0, :upper16:varhalf ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vselgt.f16 s0, s0, s2 @@ -419,7 +419,7 @@ define void @test_vsel32oge_nnan(half* %lhs_ptr, half* %rhs_ptr, half* %a_ptr, h ; CHECK-NEXT: vldr.16 s4, [r0] ; CHECK-NEXT: vldr.16 s6, [r1] ; CHECK-NEXT: movw r0, :lower16:varhalf -; CHECK-NEXT: vcmpe.f16 s4, s6 +; CHECK-NEXT: vcmp.f16 s4, s6 ; CHECK-NEXT: movt r0, :upper16:varhalf ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vselge.f16 s0, s0, s2 @@ -467,7 +467,7 @@ define void @test_vsel32ugt_nnan(half* %lhs_ptr, half* %rhs_ptr, half* %a_ptr, h ; CHECK-NEXT: vldr.16 s4, [r0] ; CHECK-NEXT: vldr.16 s6, [r1] ; CHECK-NEXT: movw r0, :lower16:varhalf -; CHECK-NEXT: vcmpe.f16 s4, s6 +; CHECK-NEXT: vcmp.f16 s4, s6 ; CHECK-NEXT: movt r0, :upper16:varhalf ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vselgt.f16 s0, s0, s2 @@ -491,7 +491,7 @@ define void @test_vsel32uge_nnan(half* %lhs_ptr, half* %rhs_ptr, half* %a_ptr, h ; CHECK-NEXT: vldr.16 s4, [r0] ; CHECK-NEXT: vldr.16 s6, [r1] ; CHECK-NEXT: movw r0, :lower16:varhalf -; CHECK-NEXT: vcmpe.f16 s4, s6 +; CHECK-NEXT: vcmp.f16 s4, s6 ; CHECK-NEXT: movt r0, :upper16:varhalf ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vselge.f16 s0, s0, s2 @@ -515,7 +515,7 @@ define void @test_vsel32olt_nnan(half* %lhs_ptr, half* %rhs_ptr, half* %a_ptr, h ; CHECK-NEXT: vldr.16 s4, [r0] ; CHECK-NEXT: vldr.16 s6, [r1] ; CHECK-NEXT: movw r0, :lower16:varhalf -; CHECK-NEXT: vcmpe.f16 s6, s4 +; CHECK-NEXT: vcmp.f16 s6, s4 ; CHECK-NEXT: movt r0, :upper16:varhalf ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vselgt.f16 s0, s0, s2 @@ -539,7 +539,7 @@ define void @test_vsel32ult_nnan(half* %lhs_ptr, half* %rhs_ptr, half* %a_ptr, h ; CHECK-NEXT: vldr.16 s4, [r0] ; CHECK-NEXT: vldr.16 s6, [r1] ; CHECK-NEXT: movw r0, :lower16:varhalf -; CHECK-NEXT: vcmpe.f16 s6, s4 +; CHECK-NEXT: vcmp.f16 s6, s4 ; CHECK-NEXT: movt r0, :upper16:varhalf ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vselgt.f16 s0, s0, s2 @@ -563,7 +563,7 @@ define void @test_vsel32ole_nnan(half* %lhs_ptr, half* %rhs_ptr, half* %a_ptr, h ; CHECK-NEXT: vldr.16 s4, [r0] ; CHECK-NEXT: vldr.16 s6, [r1] ; CHECK-NEXT: movw r0, :lower16:varhalf -; CHECK-NEXT: vcmpe.f16 s6, s4 +; CHECK-NEXT: vcmp.f16 s6, s4 ; CHECK-NEXT: movt r0, :upper16:varhalf ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vselge.f16 s0, s0, s2 @@ -587,7 +587,7 @@ define void @test_vsel32ule_nnan(half* %lhs_ptr, half* %rhs_ptr, half* %a_ptr, h ; CHECK-NEXT: vldr.16 s4, [r0] ; CHECK-NEXT: vldr.16 s6, [r1] ; CHECK-NEXT: movw r0, :lower16:varhalf -; CHECK-NEXT: vcmpe.f16 s6, s4 +; CHECK-NEXT: vcmp.f16 s6, s4 ; CHECK-NEXT: movt r0, :upper16:varhalf ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vselge.f16 s0, s0, s2 @@ -611,7 +611,7 @@ define void @test_vsel32ord_nnan(half* %lhs_ptr, half* %rhs_ptr, half* %a_ptr, h ; CHECK-NEXT: vldr.16 s4, [r0] ; CHECK-NEXT: vldr.16 s6, [r1] ; CHECK-NEXT: movw r0, :lower16:varhalf -; CHECK-NEXT: vcmpe.f16 s4, s6 +; CHECK-NEXT: vcmp.f16 s4, s6 ; CHECK-NEXT: movt r0, :upper16:varhalf ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vselvs.f16 s0, s2, s0 @@ -659,7 +659,7 @@ define void @test_vsel32uno_nnan(half* %lhs_ptr, half* %rhs_ptr, half* %a_ptr, h ; CHECK-NEXT: vldr.16 s4, [r0] ; CHECK-NEXT: vldr.16 s6, [r1] ; CHECK-NEXT: movw r0, :lower16:varhalf -; CHECK-NEXT: vcmpe.f16 s4, s6 +; CHECK-NEXT: vcmp.f16 s4, s6 ; CHECK-NEXT: movt r0, :upper16:varhalf ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vselvs.f16 s0, s0, s2 diff --git a/llvm/test/CodeGen/ARM/vsel.ll b/llvm/test/CodeGen/ARM/vsel.ll index 9408424e3a6d5..33d16ad45e242 100644 --- a/llvm/test/CodeGen/ARM/vsel.ll +++ b/llvm/test/CodeGen/ARM/vsel.ll @@ -96,7 +96,7 @@ define void @test_vsel32ogt(float %lhs32, float %rhs32, float %a, float %b) { %tst1 = fcmp ogt float %lhs32, %rhs32 %val1 = select i1 %tst1, float %a, float %b store float %val1, float* @varfloat -; CHECK: vcmpe.f32 s0, s1 +; CHECK: vcmp.f32 s0, s1 ; CHECK: vselgt.f32 s0, s2, s3 ret void } @@ -105,7 +105,7 @@ define void @test_vsel64ogt(float %lhs32, float %rhs32, double %a, double %b) { %tst1 = fcmp ogt float %lhs32, %rhs32 %val1 = select i1 %tst1, double %a, double %b store double %val1, double* @vardouble -; CHECK: vcmpe.f32 s0, s1 +; CHECK: vcmp.f32 s0, s1 ; CHECK: vselgt.f64 d16, d1, d2 ret void } @@ -114,7 +114,7 @@ define void @test_vsel32oge(float %lhs32, float %rhs32, float %a, float %b) { %tst1 = fcmp oge float %lhs32, %rhs32 %val1 = select i1 %tst1, float %a, float %b store float %val1, float* @varfloat -; CHECK: vcmpe.f32 s0, s1 +; CHECK: vcmp.f32 s0, s1 ; CHECK: vselge.f32 s0, s2, s3 ret void } @@ -123,7 +123,7 @@ define void @test_vsel64oge(float %lhs32, float %rhs32, double %a, double %b) { %tst1 = fcmp oge float %lhs32, %rhs32 %val1 = select i1 %tst1, double %a, double %b store double %val1, double* @vardouble -; CHECK: vcmpe.f32 s0, s1 +; CHECK: vcmp.f32 s0, s1 ; CHECK: vselge.f64 d16, d1, d2 ret void } @@ -150,7 +150,7 @@ define void @test_vsel32ugt(float %lhs32, float %rhs32, float %a, float %b) { %tst1 = fcmp ugt float %lhs32, %rhs32 %val1 = select i1 %tst1, float %a, float %b store float %val1, float* @varfloat -; CHECK: vcmpe.f32 s1, s0 +; CHECK: vcmp.f32 s1, s0 ; CHECK: vselge.f32 s0, s3, s2 ret void } @@ -159,7 +159,7 @@ define void @test_vsel64ugt(float %lhs32, float %rhs32, double %a, double %b) { %tst1 = fcmp ugt float %lhs32, %rhs32 %val1 = select i1 %tst1, double %a, double %b store double %val1, double* @vardouble -; CHECK: vcmpe.f32 s1, s0 +; CHECK: vcmp.f32 s1, s0 ; CHECK: vselge.f64 d16, d2, d1 ret void } @@ -168,7 +168,7 @@ define void @test_vsel32uge(float %lhs32, float %rhs32, float %a, float %b) { %tst1 = fcmp uge float %lhs32, %rhs32 %val1 = select i1 %tst1, float %a, float %b store float %val1, float* @varfloat -; CHECK: vcmpe.f32 s1, s0 +; CHECK: vcmp.f32 s1, s0 ; CHECK: vselgt.f32 s0, s3, s2 ret void } @@ -177,7 +177,7 @@ define void @test_vsel64uge(float %lhs32, float %rhs32, double %a, double %b) { %tst1 = fcmp uge float %lhs32, %rhs32 %val1 = select i1 %tst1, double %a, double %b store double %val1, double* @vardouble -; CHECK: vcmpe.f32 s1, s0 +; CHECK: vcmp.f32 s1, s0 ; CHECK: vselgt.f64 d16, d2, d1 ret void } @@ -186,7 +186,7 @@ define void @test_vsel32olt(float %lhs32, float %rhs32, float %a, float %b) { %tst1 = fcmp olt float %lhs32, %rhs32 %val1 = select i1 %tst1, float %a, float %b store float %val1, float* @varfloat -; CHECK: vcmpe.f32 s1, s0 +; CHECK: vcmp.f32 s1, s0 ; CHECK: vselgt.f32 s0, s2, s3 ret void } @@ -195,7 +195,7 @@ define void @test_vsel64olt(float %lhs32, float %rhs32, double %a, double %b) { %tst1 = fcmp olt float %lhs32, %rhs32 %val1 = select i1 %tst1, double %a, double %b store double %val1, double* @vardouble -; CHECK: vcmpe.f32 s1, s0 +; CHECK: vcmp.f32 s1, s0 ; CHECK: vselgt.f64 d16, d1, d2 ret void } @@ -204,7 +204,7 @@ define void @test_vsel32ult(float %lhs32, float %rhs32, float %a, float %b) { %tst1 = fcmp ult float %lhs32, %rhs32 %val1 = select i1 %tst1, float %a, float %b store float %val1, float* @varfloat -; CHECK: vcmpe.f32 s0, s1 +; CHECK: vcmp.f32 s0, s1 ; CHECK: vselge.f32 s0, s3, s2 ret void } @@ -213,7 +213,7 @@ define void @test_vsel64ult(float %lhs32, float %rhs32, double %a, double %b) { %tst1 = fcmp ult float %lhs32, %rhs32 %val1 = select i1 %tst1, double %a, double %b store double %val1, double* @vardouble -; CHECK: vcmpe.f32 s0, s1 +; CHECK: vcmp.f32 s0, s1 ; CHECK: vselge.f64 d16, d2, d1 ret void } @@ -222,7 +222,7 @@ define void @test_vsel32ole(float %lhs32, float %rhs32, float %a, float %b) { %tst1 = fcmp ole float %lhs32, %rhs32 %val1 = select i1 %tst1, float %a, float %b store float %val1, float* @varfloat -; CHECK: vcmpe.f32 s1, s0 +; CHECK: vcmp.f32 s1, s0 ; CHECK: vselge.f32 s0, s2, s3 ret void } @@ -231,7 +231,7 @@ define void @test_vsel64ole(float %lhs32, float %rhs32, double %a, double %b) { %tst1 = fcmp ole float %lhs32, %rhs32 %val1 = select i1 %tst1, double %a, double %b store double %val1, double* @vardouble -; CHECK: vcmpe.f32 s1, s0 +; CHECK: vcmp.f32 s1, s0 ; CHECK: vselge.f64 d16, d1, d2 ret void } @@ -240,7 +240,7 @@ define void @test_vsel32ule(float %lhs32, float %rhs32, float %a, float %b) { %tst1 = fcmp ule float %lhs32, %rhs32 %val1 = select i1 %tst1, float %a, float %b store float %val1, float* @varfloat -; CHECK: vcmpe.f32 s0, s1 +; CHECK: vcmp.f32 s0, s1 ; CHECK: vselgt.f32 s0, s3, s2 ret void } @@ -249,7 +249,7 @@ define void @test_vsel64ule(float %lhs32, float %rhs32, double %a, double %b) { %tst1 = fcmp ule float %lhs32, %rhs32 %val1 = select i1 %tst1, double %a, double %b store double %val1, double* @vardouble -; CHECK: vcmpe.f32 s0, s1 +; CHECK: vcmp.f32 s0, s1 ; CHECK: vselgt.f64 d16, d2, d1 ret void } @@ -258,7 +258,7 @@ define void @test_vsel32ord(float %lhs32, float %rhs32, float %a, float %b) { %tst1 = fcmp ord float %lhs32, %rhs32 %val1 = select i1 %tst1, float %a, float %b store float %val1, float* @varfloat -; CHECK: vcmpe.f32 s0, s1 +; CHECK: vcmp.f32 s0, s1 ; CHECK: vselvs.f32 s0, s3, s2 ret void } @@ -267,7 +267,7 @@ define void @test_vsel64ord(float %lhs32, float %rhs32, double %a, double %b) { %tst1 = fcmp ord float %lhs32, %rhs32 %val1 = select i1 %tst1, double %a, double %b store double %val1, double* @vardouble -; CHECK: vcmpe.f32 s0, s1 +; CHECK: vcmp.f32 s0, s1 ; CHECK: vselvs.f64 d16, d2, d1 ret void } @@ -294,7 +294,7 @@ define void @test_vsel32uno(float %lhs32, float %rhs32, float %a, float %b) { %tst1 = fcmp uno float %lhs32, %rhs32 %val1 = select i1 %tst1, float %a, float %b store float %val1, float* @varfloat -; CHECK: vcmpe.f32 s0, s1 +; CHECK: vcmp.f32 s0, s1 ; CHECK: vselvs.f32 s0, s2, s3 ret void } @@ -303,7 +303,7 @@ define void @test_vsel64uno(float %lhs32, float %rhs32, double %a, double %b) { %tst1 = fcmp uno float %lhs32, %rhs32 %val1 = select i1 %tst1, double %a, double %b store double %val1, double* @vardouble -; CHECK: vcmpe.f32 s0, s1 +; CHECK: vcmp.f32 s0, s1 ; CHECK: vselvs.f64 d16, d1, d2 ret void } @@ -313,7 +313,7 @@ define void @test_vsel32ogt_nnan(float %lhs32, float %rhs32, float %a, float %b) %tst1 = fcmp nnan ogt float %lhs32, %rhs32 %val1 = select i1 %tst1, float %a, float %b store float %val1, float* @varfloat -; CHECK: vcmpe.f32 s0, s1 +; CHECK: vcmp.f32 s0, s1 ; CHECK: vselgt.f32 s0, s2, s3 ret void } @@ -322,7 +322,7 @@ define void @test_vsel64ogt_nnan(float %lhs32, float %rhs32, double %a, double % %tst1 = fcmp nnan ogt float %lhs32, %rhs32 %val1 = select i1 %tst1, double %a, double %b store double %val1, double* @vardouble -; CHECK: vcmpe.f32 s0, s1 +; CHECK: vcmp.f32 s0, s1 ; CHECK: vselgt.f64 d16, d1, d2 ret void } @@ -331,7 +331,7 @@ define void @test_vsel32oge_nnan(float %lhs32, float %rhs32, float %a, float %b) %tst1 = fcmp nnan oge float %lhs32, %rhs32 %val1 = select i1 %tst1, float %a, float %b store float %val1, float* @varfloat -; CHECK: vcmpe.f32 s0, s1 +; CHECK: vcmp.f32 s0, s1 ; CHECK: vselge.f32 s0, s2, s3 ret void } @@ -340,7 +340,7 @@ define void @test_vsel64oge_nnan(float %lhs32, float %rhs32, double %a, double % %tst1 = fcmp nnan oge float %lhs32, %rhs32 %val1 = select i1 %tst1, double %a, double %b store double %val1, double* @vardouble -; CHECK: vcmpe.f32 s0, s1 +; CHECK: vcmp.f32 s0, s1 ; CHECK: vselge.f64 d16, d1, d2 ret void } @@ -367,7 +367,7 @@ define void @test_vsel32ugt_nnan(float %lhs32, float %rhs32, float %a, float %b) %tst1 = fcmp nnan ugt float %lhs32, %rhs32 %val1 = select i1 %tst1, float %a, float %b store float %val1, float* @varfloat -; CHECK: vcmpe.f32 s0, s1 +; CHECK: vcmp.f32 s0, s1 ; CHECK: vselgt.f32 s0, s2, s3 ret void } @@ -376,7 +376,7 @@ define void @test_vsel64ugt_nnan(float %lhs32, float %rhs32, double %a, double % %tst1 = fcmp nnan ugt float %lhs32, %rhs32 %val1 = select i1 %tst1, double %a, double %b store double %val1, double* @vardouble -; CHECK: vcmpe.f32 s0, s1 +; CHECK: vcmp.f32 s0, s1 ; CHECK: vselgt.f64 d16, d1, d2 ret void } @@ -385,7 +385,7 @@ define void @test_vsel32uge_nnan(float %lhs32, float %rhs32, float %a, float %b) %tst1 = fcmp nnan uge float %lhs32, %rhs32 %val1 = select i1 %tst1, float %a, float %b store float %val1, float* @varfloat -; CHECK: vcmpe.f32 s0, s1 +; CHECK: vcmp.f32 s0, s1 ; CHECK: vselge.f32 s0, s2, s3 ret void } @@ -394,7 +394,7 @@ define void @test_vsel64uge_nnan(float %lhs32, float %rhs32, double %a, double % %tst1 = fcmp nnan uge float %lhs32, %rhs32 %val1 = select i1 %tst1, double %a, double %b store double %val1, double* @vardouble -; CHECK: vcmpe.f32 s0, s1 +; CHECK: vcmp.f32 s0, s1 ; CHECK: vselge.f64 d16, d1, d2 ret void } @@ -403,7 +403,7 @@ define void @test_vsel32olt_nnan(float %lhs32, float %rhs32, float %a, float %b) %tst1 = fcmp nnan olt float %lhs32, %rhs32 %val1 = select i1 %tst1, float %a, float %b store float %val1, float* @varfloat -; CHECK: vcmpe.f32 s1, s0 +; CHECK: vcmp.f32 s1, s0 ; CHECK: vselgt.f32 s0, s2, s3 ret void } @@ -412,7 +412,7 @@ define void @test_vsel64olt_nnan(float %lhs32, float %rhs32, double %a, double % %tst1 = fcmp nnan olt float %lhs32, %rhs32 %val1 = select i1 %tst1, double %a, double %b store double %val1, double* @vardouble -; CHECK: vcmpe.f32 s1, s0 +; CHECK: vcmp.f32 s1, s0 ; CHECK: vselgt.f64 d16, d1, d2 ret void } @@ -421,7 +421,7 @@ define void @test_vsel32ult_nnan(float %lhs32, float %rhs32, float %a, float %b) %tst1 = fcmp nnan ult float %lhs32, %rhs32 %val1 = select i1 %tst1, float %a, float %b store float %val1, float* @varfloat -; CHECK: vcmpe.f32 s1, s0 +; CHECK: vcmp.f32 s1, s0 ; CHECK: vselgt.f32 s0, s2, s3 ret void } @@ -430,7 +430,7 @@ define void @test_vsel64ult_nnan(float %lhs32, float %rhs32, double %a, double % %tst1 = fcmp nnan ult float %lhs32, %rhs32 %val1 = select i1 %tst1, double %a, double %b store double %val1, double* @vardouble -; CHECK: vcmpe.f32 s1, s0 +; CHECK: vcmp.f32 s1, s0 ; CHECK: vselgt.f64 d16, d1, d2 ret void } @@ -439,7 +439,7 @@ define void @test_vsel32ole_nnan(float %lhs32, float %rhs32, float %a, float %b) %tst1 = fcmp nnan ole float %lhs32, %rhs32 %val1 = select i1 %tst1, float %a, float %b store float %val1, float* @varfloat -; CHECK: vcmpe.f32 s1, s0 +; CHECK: vcmp.f32 s1, s0 ; CHECK: vselge.f32 s0, s2, s3 ret void } @@ -448,7 +448,7 @@ define void @test_vsel64ole_nnan(float %lhs32, float %rhs32, double %a, double % %tst1 = fcmp nnan ole float %lhs32, %rhs32 %val1 = select i1 %tst1, double %a, double %b store double %val1, double* @vardouble -; CHECK: vcmpe.f32 s1, s0 +; CHECK: vcmp.f32 s1, s0 ; CHECK: vselge.f64 d16, d1, d2 ret void } @@ -457,7 +457,7 @@ define void @test_vsel32ule_nnan(float %lhs32, float %rhs32, float %a, float %b) %tst1 = fcmp nnan ule float %lhs32, %rhs32 %val1 = select i1 %tst1, float %a, float %b store float %val1, float* @varfloat -; CHECK: vcmpe.f32 s1, s0 +; CHECK: vcmp.f32 s1, s0 ; CHECK: vselge.f32 s0, s2, s3 ret void } @@ -466,7 +466,7 @@ define void @test_vsel64ule_nnan(float %lhs32, float %rhs32, double %a, double % %tst1 = fcmp nnan ule float %lhs32, %rhs32 %val1 = select i1 %tst1, double %a, double %b store double %val1, double* @vardouble -; CHECK: vcmpe.f32 s1, s0 +; CHECK: vcmp.f32 s1, s0 ; CHECK: vselge.f64 d16, d1, d2 ret void } @@ -475,7 +475,7 @@ define void @test_vsel32ord_nnan(float %lhs32, float %rhs32, float %a, float %b) %tst1 = fcmp nnan ord float %lhs32, %rhs32 %val1 = select i1 %tst1, float %a, float %b store float %val1, float* @varfloat -; CHECK: vcmpe.f32 s0, s1 +; CHECK: vcmp.f32 s0, s1 ; CHECK: vselvs.f32 s0, s3, s2 ret void } @@ -484,7 +484,7 @@ define void @test_vsel64ord_nnan(float %lhs32, float %rhs32, double %a, double % %tst1 = fcmp nnan ord float %lhs32, %rhs32 %val1 = select i1 %tst1, double %a, double %b store double %val1, double* @vardouble -; CHECK: vcmpe.f32 s0, s1 +; CHECK: vcmp.f32 s0, s1 ; CHECK: vselvs.f64 d16, d2, d1 ret void } @@ -511,7 +511,7 @@ define void @test_vsel32uno_nnan(float %lhs32, float %rhs32, float %a, float %b) %tst1 = fcmp nnan uno float %lhs32, %rhs32 %val1 = select i1 %tst1, float %a, float %b store float %val1, float* @varfloat -; CHECK: vcmpe.f32 s0, s1 +; CHECK: vcmp.f32 s0, s1 ; CHECK: vselvs.f32 s0, s2, s3 ret void } @@ -520,7 +520,7 @@ define void @test_vsel64uno_nnan(float %lhs32, float %rhs32, double %a, double % %tst1 = fcmp nnan uno float %lhs32, %rhs32 %val1 = select i1 %tst1, double %a, double %b store double %val1, double* @vardouble -; CHECK: vcmpe.f32 s0, s1 +; CHECK: vcmp.f32 s0, s1 ; CHECK: vselvs.f64 d16, d1, d2 ret void } diff --git a/llvm/test/CodeGen/Thumb2/float-cmp.ll b/llvm/test/CodeGen/Thumb2/float-cmp.ll index 87d6ad36531d3..ca9326ad66a26 100644 --- a/llvm/test/CodeGen/Thumb2/float-cmp.ll +++ b/llvm/test/CodeGen/Thumb2/float-cmp.ll @@ -23,7 +23,7 @@ define i1 @cmp_f_oeq(float %a, float %b) { define i1 @cmp_f_ogt(float %a, float %b) { ; CHECK-LABEL: cmp_f_ogt: ; NONE: bl __aeabi_fcmpgt -; HARD: vcmpe.f32 +; HARD: vcmp.f32 ; HARD: movgt r0, #1 %1 = fcmp ogt float %a, %b ret i1 %1 @@ -31,7 +31,7 @@ define i1 @cmp_f_ogt(float %a, float %b) { define i1 @cmp_f_oge(float %a, float %b) { ; CHECK-LABEL: cmp_f_oge: ; NONE: bl __aeabi_fcmpge -; HARD: vcmpe.f32 +; HARD: vcmp.f32 ; HARD: movge r0, #1 %1 = fcmp oge float %a, %b ret i1 %1 @@ -39,7 +39,7 @@ define i1 @cmp_f_oge(float %a, float %b) { define i1 @cmp_f_olt(float %a, float %b) { ; CHECK-LABEL: cmp_f_olt: ; NONE: bl __aeabi_fcmplt -; HARD: vcmpe.f32 +; HARD: vcmp.f32 ; HARD: movmi r0, #1 %1 = fcmp olt float %a, %b ret i1 %1 @@ -47,7 +47,7 @@ define i1 @cmp_f_olt(float %a, float %b) { define i1 @cmp_f_ole(float %a, float %b) { ; CHECK-LABEL: cmp_f_ole: ; NONE: bl __aeabi_fcmple -; HARD: vcmpe.f32 +; HARD: vcmp.f32 ; HARD: movls r0, #1 %1 = fcmp ole float %a, %b ret i1 %1 @@ -65,7 +65,7 @@ define i1 @cmp_f_one(float %a, float %b) { define i1 @cmp_f_ord(float %a, float %b) { ; CHECK-LABEL: cmp_f_ord: ; NONE: bl __aeabi_fcmpun -; HARD: vcmpe.f32 +; HARD: vcmp.f32 ; HARD: movvc r0, #1 %1 = fcmp ord float %a, %b ret i1 %1 @@ -85,7 +85,7 @@ define i1 @cmp_f_ugt(float %a, float %b) { ; NONE: bl __aeabi_fcmple ; NONE-NEXT: clz r0, r0 ; NONE-NEXT: lsrs r0, r0, #5 -; HARD: vcmpe.f32 +; HARD: vcmp.f32 ; HARD: movhi r0, #1 %1 = fcmp ugt float %a, %b ret i1 %1 @@ -95,7 +95,7 @@ define i1 @cmp_f_uge(float %a, float %b) { ; NONE: bl __aeabi_fcmplt ; NONE-NEXT: clz r0, r0 ; NONE-NEXT: lsrs r0, r0, #5 -; HARD: vcmpe.f32 +; HARD: vcmp.f32 ; HARD: movpl r0, #1 %1 = fcmp uge float %a, %b ret i1 %1 @@ -105,7 +105,7 @@ define i1 @cmp_f_ult(float %a, float %b) { ; NONE: bl __aeabi_fcmpge ; NONE-NEXT: clz r0, r0 ; NONE-NEXT: lsrs r0, r0, #5 -; HARD: vcmpe.f32 +; HARD: vcmp.f32 ; HARD: movlt r0, #1 %1 = fcmp ult float %a, %b ret i1 %1 @@ -115,7 +115,7 @@ define i1 @cmp_f_ule(float %a, float %b) { ; NONE: bl __aeabi_fcmpgt ; NONE-NEXT: clz r0, r0 ; NONE-NEXT: lsrs r0, r0, #5 -; HARD: vcmpe.f32 +; HARD: vcmp.f32 ; HARD: movle r0, #1 %1 = fcmp ule float %a, %b ret i1 %1 @@ -131,7 +131,7 @@ define i1 @cmp_f_une(float %a, float %b) { define i1 @cmp_f_uno(float %a, float %b) { ; CHECK-LABEL: cmp_f_uno: ; NONE: bl __aeabi_fcmpun -; HARD: vcmpe.f32 +; HARD: vcmp.f32 ; HARD: movvs r0, #1 %1 = fcmp uno float %a, %b ret i1 %1 @@ -164,7 +164,7 @@ define i1 @cmp_d_ogt(double %a, double %b) { ; CHECK-LABEL: cmp_d_ogt: ; NONE: bl __aeabi_dcmpgt ; SP: bl __aeabi_dcmpgt -; DP: vcmpe.f64 +; DP: vcmp.f64 ; DP: movgt r0, #1 %1 = fcmp ogt double %a, %b ret i1 %1 @@ -173,7 +173,7 @@ define i1 @cmp_d_oge(double %a, double %b) { ; CHECK-LABEL: cmp_d_oge: ; NONE: bl __aeabi_dcmpge ; SP: bl __aeabi_dcmpge -; DP: vcmpe.f64 +; DP: vcmp.f64 ; DP: movge r0, #1 %1 = fcmp oge double %a, %b ret i1 %1 @@ -182,7 +182,7 @@ define i1 @cmp_d_olt(double %a, double %b) { ; CHECK-LABEL: cmp_d_olt: ; NONE: bl __aeabi_dcmplt ; SP: bl __aeabi_dcmplt -; DP: vcmpe.f64 +; DP: vcmp.f64 ; DP: movmi r0, #1 %1 = fcmp olt double %a, %b ret i1 %1 @@ -191,7 +191,7 @@ define i1 @cmp_d_ole(double %a, double %b) { ; CHECK-LABEL: cmp_d_ole: ; NONE: bl __aeabi_dcmple ; SP: bl __aeabi_dcmple -; DP: vcmpe.f64 +; DP: vcmp.f64 ; DP: movls r0, #1 %1 = fcmp ole double %a, %b ret i1 %1 @@ -212,7 +212,7 @@ define i1 @cmp_d_ord(double %a, double %b) { ; CHECK-LABEL: cmp_d_ord: ; NONE: bl __aeabi_dcmpun ; SP: bl __aeabi_dcmpun -; DP: vcmpe.f64 +; DP: vcmp.f64 ; DP: movvc r0, #1 %1 = fcmp ord double %a, %b ret i1 %1 @@ -221,7 +221,7 @@ define i1 @cmp_d_ugt(double %a, double %b) { ; CHECK-LABEL: cmp_d_ugt: ; NONE: bl __aeabi_dcmple ; SP: bl __aeabi_dcmple -; DP: vcmpe.f64 +; DP: vcmp.f64 ; DP: movhi r0, #1 %1 = fcmp ugt double %a, %b ret i1 %1 @@ -231,7 +231,7 @@ define i1 @cmp_d_ult(double %a, double %b) { ; CHECK-LABEL: cmp_d_ult: ; NONE: bl __aeabi_dcmpge ; SP: bl __aeabi_dcmpge -; DP: vcmpe.f64 +; DP: vcmp.f64 ; DP: movlt r0, #1 %1 = fcmp ult double %a, %b ret i1 %1 @@ -242,7 +242,7 @@ define i1 @cmp_d_uno(double %a, double %b) { ; CHECK-LABEL: cmp_d_uno: ; NONE: bl __aeabi_dcmpun ; SP: bl __aeabi_dcmpun -; DP: vcmpe.f64 +; DP: vcmp.f64 ; DP: movvs r0, #1 %1 = fcmp uno double %a, %b ret i1 %1 @@ -271,7 +271,7 @@ define i1 @cmp_d_uge(double %a, double %b) { ; CHECK-LABEL: cmp_d_uge: ; NONE: bl __aeabi_dcmplt ; SP: bl __aeabi_dcmplt -; DP: vcmpe.f64 +; DP: vcmp.f64 ; DP: movpl r0, #1 %1 = fcmp uge double %a, %b ret i1 %1 @@ -281,7 +281,7 @@ define i1 @cmp_d_ule(double %a, double %b) { ; CHECK-LABEL: cmp_d_ule: ; NONE: bl __aeabi_dcmpgt ; SP: bl __aeabi_dcmpgt -; DP: vcmpe.f64 +; DP: vcmp.f64 ; DP: movle r0, #1 %1 = fcmp ule double %a, %b ret i1 %1 diff --git a/llvm/test/CodeGen/Thumb2/mve-vcmpf.ll b/llvm/test/CodeGen/Thumb2/mve-vcmpf.ll index 0786849dae20e..9e793caac3dd6 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vcmpf.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vcmpf.ll @@ -121,24 +121,24 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_ogt_v4f32(<4 x float> %src, <4 x float> %src2, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_ogt_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmpe.f32 s0, s4 +; CHECK-MVE-NEXT: vcmp.f32 s0, s4 ; CHECK-MVE-NEXT: movs r1, #0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it gt ; CHECK-MVE-NEXT: movgt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmpe.f32 s1, s5 +; CHECK-MVE-NEXT: vcmp.f32 s1, s5 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r2, #0 -; CHECK-MVE-NEXT: vcmpe.f32 s2, s6 +; CHECK-MVE-NEXT: vcmp.f32 s2, s6 ; CHECK-MVE-NEXT: it gt ; CHECK-MVE-NEXT: movgt r2, #1 ; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: cset r2, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r3, #0 -; CHECK-MVE-NEXT: vcmpe.f32 s3, s7 +; CHECK-MVE-NEXT: vcmp.f32 s3, s7 ; CHECK-MVE-NEXT: it gt ; CHECK-MVE-NEXT: movgt r3, #1 ; CHECK-MVE-NEXT: cmp r3, #0 @@ -173,24 +173,24 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_oge_v4f32(<4 x float> %src, <4 x float> %src2, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_oge_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmpe.f32 s0, s4 +; CHECK-MVE-NEXT: vcmp.f32 s0, s4 ; CHECK-MVE-NEXT: movs r1, #0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it ge ; CHECK-MVE-NEXT: movge r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmpe.f32 s1, s5 +; CHECK-MVE-NEXT: vcmp.f32 s1, s5 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r2, #0 -; CHECK-MVE-NEXT: vcmpe.f32 s2, s6 +; CHECK-MVE-NEXT: vcmp.f32 s2, s6 ; CHECK-MVE-NEXT: it ge ; CHECK-MVE-NEXT: movge r2, #1 ; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: cset r2, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r3, #0 -; CHECK-MVE-NEXT: vcmpe.f32 s3, s7 +; CHECK-MVE-NEXT: vcmp.f32 s3, s7 ; CHECK-MVE-NEXT: it ge ; CHECK-MVE-NEXT: movge r3, #1 ; CHECK-MVE-NEXT: cmp r3, #0 @@ -225,24 +225,24 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_olt_v4f32(<4 x float> %src, <4 x float> %src2, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_olt_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmpe.f32 s0, s4 +; CHECK-MVE-NEXT: vcmp.f32 s0, s4 ; CHECK-MVE-NEXT: movs r1, #0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmpe.f32 s1, s5 +; CHECK-MVE-NEXT: vcmp.f32 s1, s5 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r2, #0 -; CHECK-MVE-NEXT: vcmpe.f32 s2, s6 +; CHECK-MVE-NEXT: vcmp.f32 s2, s6 ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r2, #1 ; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: cset r2, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r3, #0 -; CHECK-MVE-NEXT: vcmpe.f32 s3, s7 +; CHECK-MVE-NEXT: vcmp.f32 s3, s7 ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r3, #1 ; CHECK-MVE-NEXT: cmp r3, #0 @@ -277,24 +277,24 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_ole_v4f32(<4 x float> %src, <4 x float> %src2, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_ole_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmpe.f32 s0, s4 +; CHECK-MVE-NEXT: vcmp.f32 s0, s4 ; CHECK-MVE-NEXT: movs r1, #0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it ls ; CHECK-MVE-NEXT: movls r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmpe.f32 s1, s5 +; CHECK-MVE-NEXT: vcmp.f32 s1, s5 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r2, #0 -; CHECK-MVE-NEXT: vcmpe.f32 s2, s6 +; CHECK-MVE-NEXT: vcmp.f32 s2, s6 ; CHECK-MVE-NEXT: it ls ; CHECK-MVE-NEXT: movls r2, #1 ; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: cset r2, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r3, #0 -; CHECK-MVE-NEXT: vcmpe.f32 s3, s7 +; CHECK-MVE-NEXT: vcmp.f32 s3, s7 ; CHECK-MVE-NEXT: it ls ; CHECK-MVE-NEXT: movls r3, #1 ; CHECK-MVE-NEXT: cmp r3, #0 @@ -444,24 +444,24 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_ugt_v4f32(<4 x float> %src, <4 x float> %src2, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_ugt_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmpe.f32 s0, s4 +; CHECK-MVE-NEXT: vcmp.f32 s0, s4 ; CHECK-MVE-NEXT: movs r1, #0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it hi ; CHECK-MVE-NEXT: movhi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmpe.f32 s1, s5 +; CHECK-MVE-NEXT: vcmp.f32 s1, s5 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r2, #0 -; CHECK-MVE-NEXT: vcmpe.f32 s2, s6 +; CHECK-MVE-NEXT: vcmp.f32 s2, s6 ; CHECK-MVE-NEXT: it hi ; CHECK-MVE-NEXT: movhi r2, #1 ; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: cset r2, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r3, #0 -; CHECK-MVE-NEXT: vcmpe.f32 s3, s7 +; CHECK-MVE-NEXT: vcmp.f32 s3, s7 ; CHECK-MVE-NEXT: it hi ; CHECK-MVE-NEXT: movhi r3, #1 ; CHECK-MVE-NEXT: cmp r3, #0 @@ -497,24 +497,24 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_uge_v4f32(<4 x float> %src, <4 x float> %src2, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_uge_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmpe.f32 s0, s4 +; CHECK-MVE-NEXT: vcmp.f32 s0, s4 ; CHECK-MVE-NEXT: movs r1, #0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it pl ; CHECK-MVE-NEXT: movpl r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmpe.f32 s1, s5 +; CHECK-MVE-NEXT: vcmp.f32 s1, s5 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r2, #0 -; CHECK-MVE-NEXT: vcmpe.f32 s2, s6 +; CHECK-MVE-NEXT: vcmp.f32 s2, s6 ; CHECK-MVE-NEXT: it pl ; CHECK-MVE-NEXT: movpl r2, #1 ; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: cset r2, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r3, #0 -; CHECK-MVE-NEXT: vcmpe.f32 s3, s7 +; CHECK-MVE-NEXT: vcmp.f32 s3, s7 ; CHECK-MVE-NEXT: it pl ; CHECK-MVE-NEXT: movpl r3, #1 ; CHECK-MVE-NEXT: cmp r3, #0 @@ -550,24 +550,24 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_ult_v4f32(<4 x float> %src, <4 x float> %src2, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_ult_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmpe.f32 s0, s4 +; CHECK-MVE-NEXT: vcmp.f32 s0, s4 ; CHECK-MVE-NEXT: movs r1, #0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it lt ; CHECK-MVE-NEXT: movlt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmpe.f32 s1, s5 +; CHECK-MVE-NEXT: vcmp.f32 s1, s5 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r2, #0 -; CHECK-MVE-NEXT: vcmpe.f32 s2, s6 +; CHECK-MVE-NEXT: vcmp.f32 s2, s6 ; CHECK-MVE-NEXT: it lt ; CHECK-MVE-NEXT: movlt r2, #1 ; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: cset r2, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r3, #0 -; CHECK-MVE-NEXT: vcmpe.f32 s3, s7 +; CHECK-MVE-NEXT: vcmp.f32 s3, s7 ; CHECK-MVE-NEXT: it lt ; CHECK-MVE-NEXT: movlt r3, #1 ; CHECK-MVE-NEXT: cmp r3, #0 @@ -603,24 +603,24 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_ule_v4f32(<4 x float> %src, <4 x float> %src2, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_ule_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmpe.f32 s0, s4 +; CHECK-MVE-NEXT: vcmp.f32 s0, s4 ; CHECK-MVE-NEXT: movs r1, #0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it le ; CHECK-MVE-NEXT: movle r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmpe.f32 s1, s5 +; CHECK-MVE-NEXT: vcmp.f32 s1, s5 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r2, #0 -; CHECK-MVE-NEXT: vcmpe.f32 s2, s6 +; CHECK-MVE-NEXT: vcmp.f32 s2, s6 ; CHECK-MVE-NEXT: it le ; CHECK-MVE-NEXT: movle r2, #1 ; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: cset r2, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r3, #0 -; CHECK-MVE-NEXT: vcmpe.f32 s3, s7 +; CHECK-MVE-NEXT: vcmp.f32 s3, s7 ; CHECK-MVE-NEXT: it le ; CHECK-MVE-NEXT: movle r3, #1 ; CHECK-MVE-NEXT: cmp r3, #0 @@ -656,24 +656,24 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_ord_v4f32(<4 x float> %src, <4 x float> %src2, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_ord_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmpe.f32 s0, s4 +; CHECK-MVE-NEXT: vcmp.f32 s0, s4 ; CHECK-MVE-NEXT: movs r1, #0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it vc ; CHECK-MVE-NEXT: movvc r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmpe.f32 s1, s5 +; CHECK-MVE-NEXT: vcmp.f32 s1, s5 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r2, #0 -; CHECK-MVE-NEXT: vcmpe.f32 s2, s6 +; CHECK-MVE-NEXT: vcmp.f32 s2, s6 ; CHECK-MVE-NEXT: it vc ; CHECK-MVE-NEXT: movvc r2, #1 ; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: cset r2, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r3, #0 -; CHECK-MVE-NEXT: vcmpe.f32 s3, s7 +; CHECK-MVE-NEXT: vcmp.f32 s3, s7 ; CHECK-MVE-NEXT: it vc ; CHECK-MVE-NEXT: movvc r3, #1 ; CHECK-MVE-NEXT: cmp r3, #0 @@ -710,24 +710,24 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_uno_v4f32(<4 x float> %src, <4 x float> %src2, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_uno_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmpe.f32 s0, s4 +; CHECK-MVE-NEXT: vcmp.f32 s0, s4 ; CHECK-MVE-NEXT: movs r1, #0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmpe.f32 s1, s5 +; CHECK-MVE-NEXT: vcmp.f32 s1, s5 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r2, #0 -; CHECK-MVE-NEXT: vcmpe.f32 s2, s6 +; CHECK-MVE-NEXT: vcmp.f32 s2, s6 ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r2, #1 ; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: cset r2, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r3, #0 -; CHECK-MVE-NEXT: vcmpe.f32 s3, s7 +; CHECK-MVE-NEXT: vcmp.f32 s3, s7 ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r3, #1 ; CHECK-MVE-NEXT: cmp r3, #0 @@ -1035,13 +1035,13 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ogt_v8f16(<8 x half> %src, <8 x half> %s ; CHECK-MVE-NEXT: vpush {d8, d9, d10, d11} ; CHECK-MVE-NEXT: vmovx.f16 s16, s4 ; CHECK-MVE-NEXT: vmovx.f16 s18, s0 -; CHECK-MVE-NEXT: vcmpe.f16 s18, s16 +; CHECK-MVE-NEXT: vcmp.f16 s18, s16 ; CHECK-MVE-NEXT: movs r1, #0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it gt ; CHECK-MVE-NEXT: movgt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmpe.f16 s0, s4 +; CHECK-MVE-NEXT: vcmp.f16 s0, s4 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s16, s8 ; CHECK-MVE-NEXT: vmovx.f16 s18, s12 @@ -1054,7 +1054,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ogt_v8f16(<8 x half> %src, <8 x half> %s ; CHECK-MVE-NEXT: movgt r2, #1 ; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vcmpe.f16 s1, s5 +; CHECK-MVE-NEXT: vcmp.f16 s1, s5 ; CHECK-MVE-NEXT: lsls r2, r2, #31 ; CHECK-MVE-NEXT: vmovx.f16 s22, s1 ; CHECK-MVE-NEXT: vseleq.f16 s16, s12, s8 @@ -1074,7 +1074,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ogt_v8f16(<8 x half> %src, <8 x half> %s ; CHECK-MVE-NEXT: vseleq.f16 s20, s13, s9 ; CHECK-MVE-NEXT: vmov r1, s20 ; CHECK-MVE-NEXT: vmovx.f16 s20, s5 -; CHECK-MVE-NEXT: vcmpe.f16 s22, s20 +; CHECK-MVE-NEXT: vcmp.f16 s22, s20 ; CHECK-MVE-NEXT: vmov.16 q4[2], r1 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r1, #0 @@ -1086,7 +1086,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ogt_v8f16(<8 x half> %src, <8 x half> %s ; CHECK-MVE-NEXT: vmovx.f16 s22, s13 ; CHECK-MVE-NEXT: lsls r1, r1, #31 ; CHECK-MVE-NEXT: vseleq.f16 s20, s22, s20 -; CHECK-MVE-NEXT: vcmpe.f16 s2, s6 +; CHECK-MVE-NEXT: vcmp.f16 s2, s6 ; CHECK-MVE-NEXT: vmov r1, s20 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vmov.16 q4[3], r1 @@ -1101,7 +1101,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ogt_v8f16(<8 x half> %src, <8 x half> %s ; CHECK-MVE-NEXT: vseleq.f16 s20, s14, s10 ; CHECK-MVE-NEXT: vmov r1, s20 ; CHECK-MVE-NEXT: vmovx.f16 s20, s6 -; CHECK-MVE-NEXT: vcmpe.f16 s22, s20 +; CHECK-MVE-NEXT: vcmp.f16 s22, s20 ; CHECK-MVE-NEXT: vmov.16 q4[4], r1 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r1, #0 @@ -1113,7 +1113,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ogt_v8f16(<8 x half> %src, <8 x half> %s ; CHECK-MVE-NEXT: vmovx.f16 s22, s14 ; CHECK-MVE-NEXT: lsls r1, r1, #31 ; CHECK-MVE-NEXT: vseleq.f16 s20, s22, s20 -; CHECK-MVE-NEXT: vcmpe.f16 s3, s7 +; CHECK-MVE-NEXT: vcmp.f16 s3, s7 ; CHECK-MVE-NEXT: vmov r1, s20 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vmov.16 q4[5], r1 @@ -1122,7 +1122,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ogt_v8f16(<8 x half> %src, <8 x half> %s ; CHECK-MVE-NEXT: movgt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vcmpe.f16 s0, s4 +; CHECK-MVE-NEXT: vcmp.f16 s0, s4 ; CHECK-MVE-NEXT: lsls r1, r1, #31 ; CHECK-MVE-NEXT: vmovx.f16 s0, s11 ; CHECK-MVE-NEXT: vseleq.f16 s20, s15, s11 @@ -1159,13 +1159,13 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_oge_v8f16(<8 x half> %src, <8 x half> %s ; CHECK-MVE-NEXT: vpush {d8, d9, d10, d11} ; CHECK-MVE-NEXT: vmovx.f16 s16, s4 ; CHECK-MVE-NEXT: vmovx.f16 s18, s0 -; CHECK-MVE-NEXT: vcmpe.f16 s18, s16 +; CHECK-MVE-NEXT: vcmp.f16 s18, s16 ; CHECK-MVE-NEXT: movs r1, #0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it ge ; CHECK-MVE-NEXT: movge r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmpe.f16 s0, s4 +; CHECK-MVE-NEXT: vcmp.f16 s0, s4 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s16, s8 ; CHECK-MVE-NEXT: vmovx.f16 s18, s12 @@ -1178,7 +1178,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_oge_v8f16(<8 x half> %src, <8 x half> %s ; CHECK-MVE-NEXT: movge r2, #1 ; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vcmpe.f16 s1, s5 +; CHECK-MVE-NEXT: vcmp.f16 s1, s5 ; CHECK-MVE-NEXT: lsls r2, r2, #31 ; CHECK-MVE-NEXT: vmovx.f16 s22, s1 ; CHECK-MVE-NEXT: vseleq.f16 s16, s12, s8 @@ -1198,7 +1198,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_oge_v8f16(<8 x half> %src, <8 x half> %s ; CHECK-MVE-NEXT: vseleq.f16 s20, s13, s9 ; CHECK-MVE-NEXT: vmov r1, s20 ; CHECK-MVE-NEXT: vmovx.f16 s20, s5 -; CHECK-MVE-NEXT: vcmpe.f16 s22, s20 +; CHECK-MVE-NEXT: vcmp.f16 s22, s20 ; CHECK-MVE-NEXT: vmov.16 q4[2], r1 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r1, #0 @@ -1210,7 +1210,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_oge_v8f16(<8 x half> %src, <8 x half> %s ; CHECK-MVE-NEXT: vmovx.f16 s22, s13 ; CHECK-MVE-NEXT: lsls r1, r1, #31 ; CHECK-MVE-NEXT: vseleq.f16 s20, s22, s20 -; CHECK-MVE-NEXT: vcmpe.f16 s2, s6 +; CHECK-MVE-NEXT: vcmp.f16 s2, s6 ; CHECK-MVE-NEXT: vmov r1, s20 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vmov.16 q4[3], r1 @@ -1225,7 +1225,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_oge_v8f16(<8 x half> %src, <8 x half> %s ; CHECK-MVE-NEXT: vseleq.f16 s20, s14, s10 ; CHECK-MVE-NEXT: vmov r1, s20 ; CHECK-MVE-NEXT: vmovx.f16 s20, s6 -; CHECK-MVE-NEXT: vcmpe.f16 s22, s20 +; CHECK-MVE-NEXT: vcmp.f16 s22, s20 ; CHECK-MVE-NEXT: vmov.16 q4[4], r1 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r1, #0 @@ -1237,7 +1237,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_oge_v8f16(<8 x half> %src, <8 x half> %s ; CHECK-MVE-NEXT: vmovx.f16 s22, s14 ; CHECK-MVE-NEXT: lsls r1, r1, #31 ; CHECK-MVE-NEXT: vseleq.f16 s20, s22, s20 -; CHECK-MVE-NEXT: vcmpe.f16 s3, s7 +; CHECK-MVE-NEXT: vcmp.f16 s3, s7 ; CHECK-MVE-NEXT: vmov r1, s20 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vmov.16 q4[5], r1 @@ -1246,7 +1246,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_oge_v8f16(<8 x half> %src, <8 x half> %s ; CHECK-MVE-NEXT: movge r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vcmpe.f16 s0, s4 +; CHECK-MVE-NEXT: vcmp.f16 s0, s4 ; CHECK-MVE-NEXT: lsls r1, r1, #31 ; CHECK-MVE-NEXT: vmovx.f16 s0, s11 ; CHECK-MVE-NEXT: vseleq.f16 s20, s15, s11 @@ -1283,13 +1283,13 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_olt_v8f16(<8 x half> %src, <8 x half> %s ; CHECK-MVE-NEXT: vpush {d8, d9, d10, d11} ; CHECK-MVE-NEXT: vmovx.f16 s16, s4 ; CHECK-MVE-NEXT: vmovx.f16 s18, s0 -; CHECK-MVE-NEXT: vcmpe.f16 s18, s16 +; CHECK-MVE-NEXT: vcmp.f16 s18, s16 ; CHECK-MVE-NEXT: movs r1, #0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmpe.f16 s0, s4 +; CHECK-MVE-NEXT: vcmp.f16 s0, s4 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s16, s8 ; CHECK-MVE-NEXT: vmovx.f16 s18, s12 @@ -1302,7 +1302,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_olt_v8f16(<8 x half> %src, <8 x half> %s ; CHECK-MVE-NEXT: movmi r2, #1 ; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vcmpe.f16 s1, s5 +; CHECK-MVE-NEXT: vcmp.f16 s1, s5 ; CHECK-MVE-NEXT: lsls r2, r2, #31 ; CHECK-MVE-NEXT: vmovx.f16 s22, s1 ; CHECK-MVE-NEXT: vseleq.f16 s16, s12, s8 @@ -1322,7 +1322,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_olt_v8f16(<8 x half> %src, <8 x half> %s ; CHECK-MVE-NEXT: vseleq.f16 s20, s13, s9 ; CHECK-MVE-NEXT: vmov r1, s20 ; CHECK-MVE-NEXT: vmovx.f16 s20, s5 -; CHECK-MVE-NEXT: vcmpe.f16 s22, s20 +; CHECK-MVE-NEXT: vcmp.f16 s22, s20 ; CHECK-MVE-NEXT: vmov.16 q4[2], r1 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r1, #0 @@ -1334,7 +1334,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_olt_v8f16(<8 x half> %src, <8 x half> %s ; CHECK-MVE-NEXT: vmovx.f16 s22, s13 ; CHECK-MVE-NEXT: lsls r1, r1, #31 ; CHECK-MVE-NEXT: vseleq.f16 s20, s22, s20 -; CHECK-MVE-NEXT: vcmpe.f16 s2, s6 +; CHECK-MVE-NEXT: vcmp.f16 s2, s6 ; CHECK-MVE-NEXT: vmov r1, s20 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vmov.16 q4[3], r1 @@ -1349,7 +1349,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_olt_v8f16(<8 x half> %src, <8 x half> %s ; CHECK-MVE-NEXT: vseleq.f16 s20, s14, s10 ; CHECK-MVE-NEXT: vmov r1, s20 ; CHECK-MVE-NEXT: vmovx.f16 s20, s6 -; CHECK-MVE-NEXT: vcmpe.f16 s22, s20 +; CHECK-MVE-NEXT: vcmp.f16 s22, s20 ; CHECK-MVE-NEXT: vmov.16 q4[4], r1 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r1, #0 @@ -1361,7 +1361,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_olt_v8f16(<8 x half> %src, <8 x half> %s ; CHECK-MVE-NEXT: vmovx.f16 s22, s14 ; CHECK-MVE-NEXT: lsls r1, r1, #31 ; CHECK-MVE-NEXT: vseleq.f16 s20, s22, s20 -; CHECK-MVE-NEXT: vcmpe.f16 s3, s7 +; CHECK-MVE-NEXT: vcmp.f16 s3, s7 ; CHECK-MVE-NEXT: vmov r1, s20 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vmov.16 q4[5], r1 @@ -1370,7 +1370,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_olt_v8f16(<8 x half> %src, <8 x half> %s ; CHECK-MVE-NEXT: movmi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vcmpe.f16 s0, s4 +; CHECK-MVE-NEXT: vcmp.f16 s0, s4 ; CHECK-MVE-NEXT: lsls r1, r1, #31 ; CHECK-MVE-NEXT: vmovx.f16 s0, s11 ; CHECK-MVE-NEXT: vseleq.f16 s20, s15, s11 @@ -1407,13 +1407,13 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ole_v8f16(<8 x half> %src, <8 x half> %s ; CHECK-MVE-NEXT: vpush {d8, d9, d10, d11} ; CHECK-MVE-NEXT: vmovx.f16 s16, s4 ; CHECK-MVE-NEXT: vmovx.f16 s18, s0 -; CHECK-MVE-NEXT: vcmpe.f16 s18, s16 +; CHECK-MVE-NEXT: vcmp.f16 s18, s16 ; CHECK-MVE-NEXT: movs r1, #0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it ls ; CHECK-MVE-NEXT: movls r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmpe.f16 s0, s4 +; CHECK-MVE-NEXT: vcmp.f16 s0, s4 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s16, s8 ; CHECK-MVE-NEXT: vmovx.f16 s18, s12 @@ -1426,7 +1426,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ole_v8f16(<8 x half> %src, <8 x half> %s ; CHECK-MVE-NEXT: movls r2, #1 ; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vcmpe.f16 s1, s5 +; CHECK-MVE-NEXT: vcmp.f16 s1, s5 ; CHECK-MVE-NEXT: lsls r2, r2, #31 ; CHECK-MVE-NEXT: vmovx.f16 s22, s1 ; CHECK-MVE-NEXT: vseleq.f16 s16, s12, s8 @@ -1446,7 +1446,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ole_v8f16(<8 x half> %src, <8 x half> %s ; CHECK-MVE-NEXT: vseleq.f16 s20, s13, s9 ; CHECK-MVE-NEXT: vmov r1, s20 ; CHECK-MVE-NEXT: vmovx.f16 s20, s5 -; CHECK-MVE-NEXT: vcmpe.f16 s22, s20 +; CHECK-MVE-NEXT: vcmp.f16 s22, s20 ; CHECK-MVE-NEXT: vmov.16 q4[2], r1 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r1, #0 @@ -1458,7 +1458,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ole_v8f16(<8 x half> %src, <8 x half> %s ; CHECK-MVE-NEXT: vmovx.f16 s22, s13 ; CHECK-MVE-NEXT: lsls r1, r1, #31 ; CHECK-MVE-NEXT: vseleq.f16 s20, s22, s20 -; CHECK-MVE-NEXT: vcmpe.f16 s2, s6 +; CHECK-MVE-NEXT: vcmp.f16 s2, s6 ; CHECK-MVE-NEXT: vmov r1, s20 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vmov.16 q4[3], r1 @@ -1473,7 +1473,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ole_v8f16(<8 x half> %src, <8 x half> %s ; CHECK-MVE-NEXT: vseleq.f16 s20, s14, s10 ; CHECK-MVE-NEXT: vmov r1, s20 ; CHECK-MVE-NEXT: vmovx.f16 s20, s6 -; CHECK-MVE-NEXT: vcmpe.f16 s22, s20 +; CHECK-MVE-NEXT: vcmp.f16 s22, s20 ; CHECK-MVE-NEXT: vmov.16 q4[4], r1 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r1, #0 @@ -1485,7 +1485,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ole_v8f16(<8 x half> %src, <8 x half> %s ; CHECK-MVE-NEXT: vmovx.f16 s22, s14 ; CHECK-MVE-NEXT: lsls r1, r1, #31 ; CHECK-MVE-NEXT: vseleq.f16 s20, s22, s20 -; CHECK-MVE-NEXT: vcmpe.f16 s3, s7 +; CHECK-MVE-NEXT: vcmp.f16 s3, s7 ; CHECK-MVE-NEXT: vmov r1, s20 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vmov.16 q4[5], r1 @@ -1494,7 +1494,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ole_v8f16(<8 x half> %src, <8 x half> %s ; CHECK-MVE-NEXT: movls r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vcmpe.f16 s0, s4 +; CHECK-MVE-NEXT: vcmp.f16 s0, s4 ; CHECK-MVE-NEXT: lsls r1, r1, #31 ; CHECK-MVE-NEXT: vmovx.f16 s0, s11 ; CHECK-MVE-NEXT: vseleq.f16 s20, s15, s11 @@ -1796,13 +1796,13 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ugt_v8f16(<8 x half> %src, <8 x half> %s ; CHECK-MVE-NEXT: vpush {d8, d9, d10, d11} ; CHECK-MVE-NEXT: vmovx.f16 s16, s4 ; CHECK-MVE-NEXT: vmovx.f16 s18, s0 -; CHECK-MVE-NEXT: vcmpe.f16 s18, s16 +; CHECK-MVE-NEXT: vcmp.f16 s18, s16 ; CHECK-MVE-NEXT: movs r1, #0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it hi ; CHECK-MVE-NEXT: movhi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmpe.f16 s0, s4 +; CHECK-MVE-NEXT: vcmp.f16 s0, s4 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s16, s8 ; CHECK-MVE-NEXT: vmovx.f16 s18, s12 @@ -1815,7 +1815,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ugt_v8f16(<8 x half> %src, <8 x half> %s ; CHECK-MVE-NEXT: movhi r2, #1 ; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vcmpe.f16 s1, s5 +; CHECK-MVE-NEXT: vcmp.f16 s1, s5 ; CHECK-MVE-NEXT: lsls r2, r2, #31 ; CHECK-MVE-NEXT: vmovx.f16 s22, s1 ; CHECK-MVE-NEXT: vseleq.f16 s16, s12, s8 @@ -1835,7 +1835,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ugt_v8f16(<8 x half> %src, <8 x half> %s ; CHECK-MVE-NEXT: vseleq.f16 s20, s13, s9 ; CHECK-MVE-NEXT: vmov r1, s20 ; CHECK-MVE-NEXT: vmovx.f16 s20, s5 -; CHECK-MVE-NEXT: vcmpe.f16 s22, s20 +; CHECK-MVE-NEXT: vcmp.f16 s22, s20 ; CHECK-MVE-NEXT: vmov.16 q4[2], r1 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r1, #0 @@ -1847,7 +1847,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ugt_v8f16(<8 x half> %src, <8 x half> %s ; CHECK-MVE-NEXT: vmovx.f16 s22, s13 ; CHECK-MVE-NEXT: lsls r1, r1, #31 ; CHECK-MVE-NEXT: vseleq.f16 s20, s22, s20 -; CHECK-MVE-NEXT: vcmpe.f16 s2, s6 +; CHECK-MVE-NEXT: vcmp.f16 s2, s6 ; CHECK-MVE-NEXT: vmov r1, s20 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vmov.16 q4[3], r1 @@ -1862,7 +1862,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ugt_v8f16(<8 x half> %src, <8 x half> %s ; CHECK-MVE-NEXT: vseleq.f16 s20, s14, s10 ; CHECK-MVE-NEXT: vmov r1, s20 ; CHECK-MVE-NEXT: vmovx.f16 s20, s6 -; CHECK-MVE-NEXT: vcmpe.f16 s22, s20 +; CHECK-MVE-NEXT: vcmp.f16 s22, s20 ; CHECK-MVE-NEXT: vmov.16 q4[4], r1 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r1, #0 @@ -1874,7 +1874,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ugt_v8f16(<8 x half> %src, <8 x half> %s ; CHECK-MVE-NEXT: vmovx.f16 s22, s14 ; CHECK-MVE-NEXT: lsls r1, r1, #31 ; CHECK-MVE-NEXT: vseleq.f16 s20, s22, s20 -; CHECK-MVE-NEXT: vcmpe.f16 s3, s7 +; CHECK-MVE-NEXT: vcmp.f16 s3, s7 ; CHECK-MVE-NEXT: vmov r1, s20 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vmov.16 q4[5], r1 @@ -1883,7 +1883,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ugt_v8f16(<8 x half> %src, <8 x half> %s ; CHECK-MVE-NEXT: movhi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vcmpe.f16 s0, s4 +; CHECK-MVE-NEXT: vcmp.f16 s0, s4 ; CHECK-MVE-NEXT: lsls r1, r1, #31 ; CHECK-MVE-NEXT: vmovx.f16 s0, s11 ; CHECK-MVE-NEXT: vseleq.f16 s20, s15, s11 @@ -1921,13 +1921,13 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_uge_v8f16(<8 x half> %src, <8 x half> %s ; CHECK-MVE-NEXT: vpush {d8, d9, d10, d11} ; CHECK-MVE-NEXT: vmovx.f16 s16, s4 ; CHECK-MVE-NEXT: vmovx.f16 s18, s0 -; CHECK-MVE-NEXT: vcmpe.f16 s18, s16 +; CHECK-MVE-NEXT: vcmp.f16 s18, s16 ; CHECK-MVE-NEXT: movs r1, #0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it pl ; CHECK-MVE-NEXT: movpl r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmpe.f16 s0, s4 +; CHECK-MVE-NEXT: vcmp.f16 s0, s4 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s16, s8 ; CHECK-MVE-NEXT: vmovx.f16 s18, s12 @@ -1940,7 +1940,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_uge_v8f16(<8 x half> %src, <8 x half> %s ; CHECK-MVE-NEXT: movpl r2, #1 ; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vcmpe.f16 s1, s5 +; CHECK-MVE-NEXT: vcmp.f16 s1, s5 ; CHECK-MVE-NEXT: lsls r2, r2, #31 ; CHECK-MVE-NEXT: vmovx.f16 s22, s1 ; CHECK-MVE-NEXT: vseleq.f16 s16, s12, s8 @@ -1960,7 +1960,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_uge_v8f16(<8 x half> %src, <8 x half> %s ; CHECK-MVE-NEXT: vseleq.f16 s20, s13, s9 ; CHECK-MVE-NEXT: vmov r1, s20 ; CHECK-MVE-NEXT: vmovx.f16 s20, s5 -; CHECK-MVE-NEXT: vcmpe.f16 s22, s20 +; CHECK-MVE-NEXT: vcmp.f16 s22, s20 ; CHECK-MVE-NEXT: vmov.16 q4[2], r1 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r1, #0 @@ -1972,7 +1972,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_uge_v8f16(<8 x half> %src, <8 x half> %s ; CHECK-MVE-NEXT: vmovx.f16 s22, s13 ; CHECK-MVE-NEXT: lsls r1, r1, #31 ; CHECK-MVE-NEXT: vseleq.f16 s20, s22, s20 -; CHECK-MVE-NEXT: vcmpe.f16 s2, s6 +; CHECK-MVE-NEXT: vcmp.f16 s2, s6 ; CHECK-MVE-NEXT: vmov r1, s20 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vmov.16 q4[3], r1 @@ -1987,7 +1987,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_uge_v8f16(<8 x half> %src, <8 x half> %s ; CHECK-MVE-NEXT: vseleq.f16 s20, s14, s10 ; CHECK-MVE-NEXT: vmov r1, s20 ; CHECK-MVE-NEXT: vmovx.f16 s20, s6 -; CHECK-MVE-NEXT: vcmpe.f16 s22, s20 +; CHECK-MVE-NEXT: vcmp.f16 s22, s20 ; CHECK-MVE-NEXT: vmov.16 q4[4], r1 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r1, #0 @@ -1999,7 +1999,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_uge_v8f16(<8 x half> %src, <8 x half> %s ; CHECK-MVE-NEXT: vmovx.f16 s22, s14 ; CHECK-MVE-NEXT: lsls r1, r1, #31 ; CHECK-MVE-NEXT: vseleq.f16 s20, s22, s20 -; CHECK-MVE-NEXT: vcmpe.f16 s3, s7 +; CHECK-MVE-NEXT: vcmp.f16 s3, s7 ; CHECK-MVE-NEXT: vmov r1, s20 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vmov.16 q4[5], r1 @@ -2008,7 +2008,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_uge_v8f16(<8 x half> %src, <8 x half> %s ; CHECK-MVE-NEXT: movpl r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vcmpe.f16 s0, s4 +; CHECK-MVE-NEXT: vcmp.f16 s0, s4 ; CHECK-MVE-NEXT: lsls r1, r1, #31 ; CHECK-MVE-NEXT: vmovx.f16 s0, s11 ; CHECK-MVE-NEXT: vseleq.f16 s20, s15, s11 @@ -2046,13 +2046,13 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ult_v8f16(<8 x half> %src, <8 x half> %s ; CHECK-MVE-NEXT: vpush {d8, d9, d10, d11} ; CHECK-MVE-NEXT: vmovx.f16 s16, s4 ; CHECK-MVE-NEXT: vmovx.f16 s18, s0 -; CHECK-MVE-NEXT: vcmpe.f16 s18, s16 +; CHECK-MVE-NEXT: vcmp.f16 s18, s16 ; CHECK-MVE-NEXT: movs r1, #0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it lt ; CHECK-MVE-NEXT: movlt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmpe.f16 s0, s4 +; CHECK-MVE-NEXT: vcmp.f16 s0, s4 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s16, s8 ; CHECK-MVE-NEXT: vmovx.f16 s18, s12 @@ -2065,7 +2065,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ult_v8f16(<8 x half> %src, <8 x half> %s ; CHECK-MVE-NEXT: movlt r2, #1 ; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vcmpe.f16 s1, s5 +; CHECK-MVE-NEXT: vcmp.f16 s1, s5 ; CHECK-MVE-NEXT: lsls r2, r2, #31 ; CHECK-MVE-NEXT: vmovx.f16 s22, s1 ; CHECK-MVE-NEXT: vseleq.f16 s16, s12, s8 @@ -2085,7 +2085,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ult_v8f16(<8 x half> %src, <8 x half> %s ; CHECK-MVE-NEXT: vseleq.f16 s20, s13, s9 ; CHECK-MVE-NEXT: vmov r1, s20 ; CHECK-MVE-NEXT: vmovx.f16 s20, s5 -; CHECK-MVE-NEXT: vcmpe.f16 s22, s20 +; CHECK-MVE-NEXT: vcmp.f16 s22, s20 ; CHECK-MVE-NEXT: vmov.16 q4[2], r1 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r1, #0 @@ -2097,7 +2097,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ult_v8f16(<8 x half> %src, <8 x half> %s ; CHECK-MVE-NEXT: vmovx.f16 s22, s13 ; CHECK-MVE-NEXT: lsls r1, r1, #31 ; CHECK-MVE-NEXT: vseleq.f16 s20, s22, s20 -; CHECK-MVE-NEXT: vcmpe.f16 s2, s6 +; CHECK-MVE-NEXT: vcmp.f16 s2, s6 ; CHECK-MVE-NEXT: vmov r1, s20 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vmov.16 q4[3], r1 @@ -2112,7 +2112,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ult_v8f16(<8 x half> %src, <8 x half> %s ; CHECK-MVE-NEXT: vseleq.f16 s20, s14, s10 ; CHECK-MVE-NEXT: vmov r1, s20 ; CHECK-MVE-NEXT: vmovx.f16 s20, s6 -; CHECK-MVE-NEXT: vcmpe.f16 s22, s20 +; CHECK-MVE-NEXT: vcmp.f16 s22, s20 ; CHECK-MVE-NEXT: vmov.16 q4[4], r1 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r1, #0 @@ -2124,7 +2124,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ult_v8f16(<8 x half> %src, <8 x half> %s ; CHECK-MVE-NEXT: vmovx.f16 s22, s14 ; CHECK-MVE-NEXT: lsls r1, r1, #31 ; CHECK-MVE-NEXT: vseleq.f16 s20, s22, s20 -; CHECK-MVE-NEXT: vcmpe.f16 s3, s7 +; CHECK-MVE-NEXT: vcmp.f16 s3, s7 ; CHECK-MVE-NEXT: vmov r1, s20 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vmov.16 q4[5], r1 @@ -2133,7 +2133,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ult_v8f16(<8 x half> %src, <8 x half> %s ; CHECK-MVE-NEXT: movlt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vcmpe.f16 s0, s4 +; CHECK-MVE-NEXT: vcmp.f16 s0, s4 ; CHECK-MVE-NEXT: lsls r1, r1, #31 ; CHECK-MVE-NEXT: vmovx.f16 s0, s11 ; CHECK-MVE-NEXT: vseleq.f16 s20, s15, s11 @@ -2171,13 +2171,13 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ule_v8f16(<8 x half> %src, <8 x half> %s ; CHECK-MVE-NEXT: vpush {d8, d9, d10, d11} ; CHECK-MVE-NEXT: vmovx.f16 s16, s4 ; CHECK-MVE-NEXT: vmovx.f16 s18, s0 -; CHECK-MVE-NEXT: vcmpe.f16 s18, s16 +; CHECK-MVE-NEXT: vcmp.f16 s18, s16 ; CHECK-MVE-NEXT: movs r1, #0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it le ; CHECK-MVE-NEXT: movle r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmpe.f16 s0, s4 +; CHECK-MVE-NEXT: vcmp.f16 s0, s4 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s16, s8 ; CHECK-MVE-NEXT: vmovx.f16 s18, s12 @@ -2190,7 +2190,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ule_v8f16(<8 x half> %src, <8 x half> %s ; CHECK-MVE-NEXT: movle r2, #1 ; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vcmpe.f16 s1, s5 +; CHECK-MVE-NEXT: vcmp.f16 s1, s5 ; CHECK-MVE-NEXT: lsls r2, r2, #31 ; CHECK-MVE-NEXT: vmovx.f16 s22, s1 ; CHECK-MVE-NEXT: vseleq.f16 s16, s12, s8 @@ -2210,7 +2210,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ule_v8f16(<8 x half> %src, <8 x half> %s ; CHECK-MVE-NEXT: vseleq.f16 s20, s13, s9 ; CHECK-MVE-NEXT: vmov r1, s20 ; CHECK-MVE-NEXT: vmovx.f16 s20, s5 -; CHECK-MVE-NEXT: vcmpe.f16 s22, s20 +; CHECK-MVE-NEXT: vcmp.f16 s22, s20 ; CHECK-MVE-NEXT: vmov.16 q4[2], r1 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r1, #0 @@ -2222,7 +2222,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ule_v8f16(<8 x half> %src, <8 x half> %s ; CHECK-MVE-NEXT: vmovx.f16 s22, s13 ; CHECK-MVE-NEXT: lsls r1, r1, #31 ; CHECK-MVE-NEXT: vseleq.f16 s20, s22, s20 -; CHECK-MVE-NEXT: vcmpe.f16 s2, s6 +; CHECK-MVE-NEXT: vcmp.f16 s2, s6 ; CHECK-MVE-NEXT: vmov r1, s20 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vmov.16 q4[3], r1 @@ -2237,7 +2237,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ule_v8f16(<8 x half> %src, <8 x half> %s ; CHECK-MVE-NEXT: vseleq.f16 s20, s14, s10 ; CHECK-MVE-NEXT: vmov r1, s20 ; CHECK-MVE-NEXT: vmovx.f16 s20, s6 -; CHECK-MVE-NEXT: vcmpe.f16 s22, s20 +; CHECK-MVE-NEXT: vcmp.f16 s22, s20 ; CHECK-MVE-NEXT: vmov.16 q4[4], r1 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r1, #0 @@ -2249,7 +2249,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ule_v8f16(<8 x half> %src, <8 x half> %s ; CHECK-MVE-NEXT: vmovx.f16 s22, s14 ; CHECK-MVE-NEXT: lsls r1, r1, #31 ; CHECK-MVE-NEXT: vseleq.f16 s20, s22, s20 -; CHECK-MVE-NEXT: vcmpe.f16 s3, s7 +; CHECK-MVE-NEXT: vcmp.f16 s3, s7 ; CHECK-MVE-NEXT: vmov r1, s20 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vmov.16 q4[5], r1 @@ -2258,7 +2258,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ule_v8f16(<8 x half> %src, <8 x half> %s ; CHECK-MVE-NEXT: movle r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vcmpe.f16 s0, s4 +; CHECK-MVE-NEXT: vcmp.f16 s0, s4 ; CHECK-MVE-NEXT: lsls r1, r1, #31 ; CHECK-MVE-NEXT: vmovx.f16 s0, s11 ; CHECK-MVE-NEXT: vseleq.f16 s20, s15, s11 @@ -2296,13 +2296,13 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ord_v8f16(<8 x half> %src, <8 x half> %s ; CHECK-MVE-NEXT: vpush {d8, d9, d10, d11} ; CHECK-MVE-NEXT: vmovx.f16 s16, s4 ; CHECK-MVE-NEXT: vmovx.f16 s18, s0 -; CHECK-MVE-NEXT: vcmpe.f16 s18, s16 +; CHECK-MVE-NEXT: vcmp.f16 s18, s16 ; CHECK-MVE-NEXT: movs r1, #0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it vc ; CHECK-MVE-NEXT: movvc r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmpe.f16 s0, s4 +; CHECK-MVE-NEXT: vcmp.f16 s0, s4 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s16, s8 ; CHECK-MVE-NEXT: vmovx.f16 s18, s12 @@ -2315,7 +2315,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ord_v8f16(<8 x half> %src, <8 x half> %s ; CHECK-MVE-NEXT: movvc r2, #1 ; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vcmpe.f16 s1, s5 +; CHECK-MVE-NEXT: vcmp.f16 s1, s5 ; CHECK-MVE-NEXT: lsls r2, r2, #31 ; CHECK-MVE-NEXT: vmovx.f16 s22, s1 ; CHECK-MVE-NEXT: vseleq.f16 s16, s12, s8 @@ -2335,7 +2335,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ord_v8f16(<8 x half> %src, <8 x half> %s ; CHECK-MVE-NEXT: vseleq.f16 s20, s13, s9 ; CHECK-MVE-NEXT: vmov r1, s20 ; CHECK-MVE-NEXT: vmovx.f16 s20, s5 -; CHECK-MVE-NEXT: vcmpe.f16 s22, s20 +; CHECK-MVE-NEXT: vcmp.f16 s22, s20 ; CHECK-MVE-NEXT: vmov.16 q4[2], r1 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r1, #0 @@ -2347,7 +2347,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ord_v8f16(<8 x half> %src, <8 x half> %s ; CHECK-MVE-NEXT: vmovx.f16 s22, s13 ; CHECK-MVE-NEXT: lsls r1, r1, #31 ; CHECK-MVE-NEXT: vseleq.f16 s20, s22, s20 -; CHECK-MVE-NEXT: vcmpe.f16 s2, s6 +; CHECK-MVE-NEXT: vcmp.f16 s2, s6 ; CHECK-MVE-NEXT: vmov r1, s20 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vmov.16 q4[3], r1 @@ -2362,7 +2362,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ord_v8f16(<8 x half> %src, <8 x half> %s ; CHECK-MVE-NEXT: vseleq.f16 s20, s14, s10 ; CHECK-MVE-NEXT: vmov r1, s20 ; CHECK-MVE-NEXT: vmovx.f16 s20, s6 -; CHECK-MVE-NEXT: vcmpe.f16 s22, s20 +; CHECK-MVE-NEXT: vcmp.f16 s22, s20 ; CHECK-MVE-NEXT: vmov.16 q4[4], r1 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r1, #0 @@ -2374,7 +2374,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ord_v8f16(<8 x half> %src, <8 x half> %s ; CHECK-MVE-NEXT: vmovx.f16 s22, s14 ; CHECK-MVE-NEXT: lsls r1, r1, #31 ; CHECK-MVE-NEXT: vseleq.f16 s20, s22, s20 -; CHECK-MVE-NEXT: vcmpe.f16 s3, s7 +; CHECK-MVE-NEXT: vcmp.f16 s3, s7 ; CHECK-MVE-NEXT: vmov r1, s20 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vmov.16 q4[5], r1 @@ -2383,7 +2383,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ord_v8f16(<8 x half> %src, <8 x half> %s ; CHECK-MVE-NEXT: movvc r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vcmpe.f16 s0, s4 +; CHECK-MVE-NEXT: vcmp.f16 s0, s4 ; CHECK-MVE-NEXT: lsls r1, r1, #31 ; CHECK-MVE-NEXT: vmovx.f16 s0, s11 ; CHECK-MVE-NEXT: vseleq.f16 s20, s15, s11 @@ -2422,13 +2422,13 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_uno_v8f16(<8 x half> %src, <8 x half> %s ; CHECK-MVE-NEXT: vpush {d8, d9, d10, d11} ; CHECK-MVE-NEXT: vmovx.f16 s16, s4 ; CHECK-MVE-NEXT: vmovx.f16 s18, s0 -; CHECK-MVE-NEXT: vcmpe.f16 s18, s16 +; CHECK-MVE-NEXT: vcmp.f16 s18, s16 ; CHECK-MVE-NEXT: movs r1, #0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmpe.f16 s0, s4 +; CHECK-MVE-NEXT: vcmp.f16 s0, s4 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s16, s8 ; CHECK-MVE-NEXT: vmovx.f16 s18, s12 @@ -2441,7 +2441,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_uno_v8f16(<8 x half> %src, <8 x half> %s ; CHECK-MVE-NEXT: movvs r2, #1 ; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vcmpe.f16 s1, s5 +; CHECK-MVE-NEXT: vcmp.f16 s1, s5 ; CHECK-MVE-NEXT: lsls r2, r2, #31 ; CHECK-MVE-NEXT: vmovx.f16 s22, s1 ; CHECK-MVE-NEXT: vseleq.f16 s16, s12, s8 @@ -2461,7 +2461,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_uno_v8f16(<8 x half> %src, <8 x half> %s ; CHECK-MVE-NEXT: vseleq.f16 s20, s13, s9 ; CHECK-MVE-NEXT: vmov r1, s20 ; CHECK-MVE-NEXT: vmovx.f16 s20, s5 -; CHECK-MVE-NEXT: vcmpe.f16 s22, s20 +; CHECK-MVE-NEXT: vcmp.f16 s22, s20 ; CHECK-MVE-NEXT: vmov.16 q4[2], r1 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r1, #0 @@ -2473,7 +2473,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_uno_v8f16(<8 x half> %src, <8 x half> %s ; CHECK-MVE-NEXT: vmovx.f16 s22, s13 ; CHECK-MVE-NEXT: lsls r1, r1, #31 ; CHECK-MVE-NEXT: vseleq.f16 s20, s22, s20 -; CHECK-MVE-NEXT: vcmpe.f16 s2, s6 +; CHECK-MVE-NEXT: vcmp.f16 s2, s6 ; CHECK-MVE-NEXT: vmov r1, s20 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vmov.16 q4[3], r1 @@ -2488,7 +2488,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_uno_v8f16(<8 x half> %src, <8 x half> %s ; CHECK-MVE-NEXT: vseleq.f16 s20, s14, s10 ; CHECK-MVE-NEXT: vmov r1, s20 ; CHECK-MVE-NEXT: vmovx.f16 s20, s6 -; CHECK-MVE-NEXT: vcmpe.f16 s22, s20 +; CHECK-MVE-NEXT: vcmp.f16 s22, s20 ; CHECK-MVE-NEXT: vmov.16 q4[4], r1 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r1, #0 @@ -2500,7 +2500,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_uno_v8f16(<8 x half> %src, <8 x half> %s ; CHECK-MVE-NEXT: vmovx.f16 s22, s14 ; CHECK-MVE-NEXT: lsls r1, r1, #31 ; CHECK-MVE-NEXT: vseleq.f16 s20, s22, s20 -; CHECK-MVE-NEXT: vcmpe.f16 s3, s7 +; CHECK-MVE-NEXT: vcmp.f16 s3, s7 ; CHECK-MVE-NEXT: vmov r1, s20 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vmov.16 q4[5], r1 @@ -2509,7 +2509,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_uno_v8f16(<8 x half> %src, <8 x half> %s ; CHECK-MVE-NEXT: movvs r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 ; CHECK-MVE-NEXT: cset r1, ne -; CHECK-MVE-NEXT: vcmpe.f16 s0, s4 +; CHECK-MVE-NEXT: vcmp.f16 s0, s4 ; CHECK-MVE-NEXT: lsls r1, r1, #31 ; CHECK-MVE-NEXT: vmovx.f16 s0, s11 ; CHECK-MVE-NEXT: vseleq.f16 s20, s15, s11 diff --git a/llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll b/llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll index 608689dc46579..66d90c892e524 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll @@ -128,24 +128,24 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_ogt_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_ogt_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmpe.f32 s0, s4 +; CHECK-MVE-NEXT: vcmp.f32 s0, s4 ; CHECK-MVE-NEXT: movs r1, #0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it gt ; CHECK-MVE-NEXT: movgt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmpe.f32 s1, s4 +; CHECK-MVE-NEXT: vcmp.f32 s1, s4 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r2, #0 -; CHECK-MVE-NEXT: vcmpe.f32 s2, s4 +; CHECK-MVE-NEXT: vcmp.f32 s2, s4 ; CHECK-MVE-NEXT: it gt ; CHECK-MVE-NEXT: movgt r2, #1 ; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: cset r2, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r3, #0 -; CHECK-MVE-NEXT: vcmpe.f32 s3, s4 +; CHECK-MVE-NEXT: vcmp.f32 s3, s4 ; CHECK-MVE-NEXT: it gt ; CHECK-MVE-NEXT: movgt r3, #1 ; CHECK-MVE-NEXT: cmp r3, #0 @@ -183,24 +183,24 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_oge_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_oge_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmpe.f32 s0, s4 +; CHECK-MVE-NEXT: vcmp.f32 s0, s4 ; CHECK-MVE-NEXT: movs r1, #0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it ge ; CHECK-MVE-NEXT: movge r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmpe.f32 s1, s4 +; CHECK-MVE-NEXT: vcmp.f32 s1, s4 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r2, #0 -; CHECK-MVE-NEXT: vcmpe.f32 s2, s4 +; CHECK-MVE-NEXT: vcmp.f32 s2, s4 ; CHECK-MVE-NEXT: it ge ; CHECK-MVE-NEXT: movge r2, #1 ; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: cset r2, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r3, #0 -; CHECK-MVE-NEXT: vcmpe.f32 s3, s4 +; CHECK-MVE-NEXT: vcmp.f32 s3, s4 ; CHECK-MVE-NEXT: it ge ; CHECK-MVE-NEXT: movge r3, #1 ; CHECK-MVE-NEXT: cmp r3, #0 @@ -238,24 +238,24 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_olt_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_olt_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmpe.f32 s0, s4 +; CHECK-MVE-NEXT: vcmp.f32 s0, s4 ; CHECK-MVE-NEXT: movs r1, #0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmpe.f32 s1, s4 +; CHECK-MVE-NEXT: vcmp.f32 s1, s4 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r2, #0 -; CHECK-MVE-NEXT: vcmpe.f32 s2, s4 +; CHECK-MVE-NEXT: vcmp.f32 s2, s4 ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r2, #1 ; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: cset r2, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r3, #0 -; CHECK-MVE-NEXT: vcmpe.f32 s3, s4 +; CHECK-MVE-NEXT: vcmp.f32 s3, s4 ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r3, #1 ; CHECK-MVE-NEXT: cmp r3, #0 @@ -294,24 +294,24 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_ole_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_ole_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmpe.f32 s0, s4 +; CHECK-MVE-NEXT: vcmp.f32 s0, s4 ; CHECK-MVE-NEXT: movs r1, #0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it ls ; CHECK-MVE-NEXT: movls r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmpe.f32 s1, s4 +; CHECK-MVE-NEXT: vcmp.f32 s1, s4 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r2, #0 -; CHECK-MVE-NEXT: vcmpe.f32 s2, s4 +; CHECK-MVE-NEXT: vcmp.f32 s2, s4 ; CHECK-MVE-NEXT: it ls ; CHECK-MVE-NEXT: movls r2, #1 ; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: cset r2, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r3, #0 -; CHECK-MVE-NEXT: vcmpe.f32 s3, s4 +; CHECK-MVE-NEXT: vcmp.f32 s3, s4 ; CHECK-MVE-NEXT: it ls ; CHECK-MVE-NEXT: movls r3, #1 ; CHECK-MVE-NEXT: cmp r3, #0 @@ -472,24 +472,24 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_ugt_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_ugt_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmpe.f32 s0, s4 +; CHECK-MVE-NEXT: vcmp.f32 s0, s4 ; CHECK-MVE-NEXT: movs r1, #0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it hi ; CHECK-MVE-NEXT: movhi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmpe.f32 s1, s4 +; CHECK-MVE-NEXT: vcmp.f32 s1, s4 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r2, #0 -; CHECK-MVE-NEXT: vcmpe.f32 s2, s4 +; CHECK-MVE-NEXT: vcmp.f32 s2, s4 ; CHECK-MVE-NEXT: it hi ; CHECK-MVE-NEXT: movhi r2, #1 ; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: cset r2, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r3, #0 -; CHECK-MVE-NEXT: vcmpe.f32 s3, s4 +; CHECK-MVE-NEXT: vcmp.f32 s3, s4 ; CHECK-MVE-NEXT: it hi ; CHECK-MVE-NEXT: movhi r3, #1 ; CHECK-MVE-NEXT: cmp r3, #0 @@ -529,24 +529,24 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_uge_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_uge_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmpe.f32 s0, s4 +; CHECK-MVE-NEXT: vcmp.f32 s0, s4 ; CHECK-MVE-NEXT: movs r1, #0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it pl ; CHECK-MVE-NEXT: movpl r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmpe.f32 s1, s4 +; CHECK-MVE-NEXT: vcmp.f32 s1, s4 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r2, #0 -; CHECK-MVE-NEXT: vcmpe.f32 s2, s4 +; CHECK-MVE-NEXT: vcmp.f32 s2, s4 ; CHECK-MVE-NEXT: it pl ; CHECK-MVE-NEXT: movpl r2, #1 ; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: cset r2, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r3, #0 -; CHECK-MVE-NEXT: vcmpe.f32 s3, s4 +; CHECK-MVE-NEXT: vcmp.f32 s3, s4 ; CHECK-MVE-NEXT: it pl ; CHECK-MVE-NEXT: movpl r3, #1 ; CHECK-MVE-NEXT: cmp r3, #0 @@ -586,24 +586,24 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_ult_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_ult_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmpe.f32 s0, s4 +; CHECK-MVE-NEXT: vcmp.f32 s0, s4 ; CHECK-MVE-NEXT: movs r1, #0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it lt ; CHECK-MVE-NEXT: movlt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmpe.f32 s1, s4 +; CHECK-MVE-NEXT: vcmp.f32 s1, s4 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r2, #0 -; CHECK-MVE-NEXT: vcmpe.f32 s2, s4 +; CHECK-MVE-NEXT: vcmp.f32 s2, s4 ; CHECK-MVE-NEXT: it lt ; CHECK-MVE-NEXT: movlt r2, #1 ; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: cset r2, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r3, #0 -; CHECK-MVE-NEXT: vcmpe.f32 s3, s4 +; CHECK-MVE-NEXT: vcmp.f32 s3, s4 ; CHECK-MVE-NEXT: it lt ; CHECK-MVE-NEXT: movlt r3, #1 ; CHECK-MVE-NEXT: cmp r3, #0 @@ -642,24 +642,24 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_ule_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_ule_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmpe.f32 s0, s4 +; CHECK-MVE-NEXT: vcmp.f32 s0, s4 ; CHECK-MVE-NEXT: movs r1, #0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it le ; CHECK-MVE-NEXT: movle r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmpe.f32 s1, s4 +; CHECK-MVE-NEXT: vcmp.f32 s1, s4 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r2, #0 -; CHECK-MVE-NEXT: vcmpe.f32 s2, s4 +; CHECK-MVE-NEXT: vcmp.f32 s2, s4 ; CHECK-MVE-NEXT: it le ; CHECK-MVE-NEXT: movle r2, #1 ; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: cset r2, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r3, #0 -; CHECK-MVE-NEXT: vcmpe.f32 s3, s4 +; CHECK-MVE-NEXT: vcmp.f32 s3, s4 ; CHECK-MVE-NEXT: it le ; CHECK-MVE-NEXT: movle r3, #1 ; CHECK-MVE-NEXT: cmp r3, #0 @@ -698,24 +698,24 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_ord_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_ord_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmpe.f32 s0, s4 +; CHECK-MVE-NEXT: vcmp.f32 s0, s4 ; CHECK-MVE-NEXT: movs r1, #0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it vc ; CHECK-MVE-NEXT: movvc r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmpe.f32 s1, s4 +; CHECK-MVE-NEXT: vcmp.f32 s1, s4 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r2, #0 -; CHECK-MVE-NEXT: vcmpe.f32 s2, s4 +; CHECK-MVE-NEXT: vcmp.f32 s2, s4 ; CHECK-MVE-NEXT: it vc ; CHECK-MVE-NEXT: movvc r2, #1 ; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: cset r2, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r3, #0 -; CHECK-MVE-NEXT: vcmpe.f32 s3, s4 +; CHECK-MVE-NEXT: vcmp.f32 s3, s4 ; CHECK-MVE-NEXT: it vc ; CHECK-MVE-NEXT: movvc r3, #1 ; CHECK-MVE-NEXT: cmp r3, #0 @@ -756,24 +756,24 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_uno_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_uno_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmpe.f32 s0, s4 +; CHECK-MVE-NEXT: vcmp.f32 s0, s4 ; CHECK-MVE-NEXT: movs r1, #0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmpe.f32 s1, s4 +; CHECK-MVE-NEXT: vcmp.f32 s1, s4 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r2, #0 -; CHECK-MVE-NEXT: vcmpe.f32 s2, s4 +; CHECK-MVE-NEXT: vcmp.f32 s2, s4 ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r2, #1 ; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: cset r2, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r3, #0 -; CHECK-MVE-NEXT: vcmpe.f32 s3, s4 +; CHECK-MVE-NEXT: vcmp.f32 s3, s4 ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r3, #1 ; CHECK-MVE-NEXT: cmp r3, #0 @@ -1092,13 +1092,13 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ogt_v8f16(<8 x half> %src, half* %src2p, ; CHECK-MVE-NEXT: vmovx.f16 s12, s0 ; CHECK-MVE-NEXT: movs r0, #0 ; CHECK-MVE-NEXT: vmovx.f16 s14, s8 -; CHECK-MVE-NEXT: vcmpe.f16 s12, s16 +; CHECK-MVE-NEXT: vcmp.f16 s12, s16 ; CHECK-MVE-NEXT: vmovx.f16 s12, s4 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it gt ; CHECK-MVE-NEXT: movgt r0, #1 ; CHECK-MVE-NEXT: cmp r0, #0 -; CHECK-MVE-NEXT: vcmpe.f16 s0, s16 +; CHECK-MVE-NEXT: vcmp.f16 s0, s16 ; CHECK-MVE-NEXT: cset r0, ne ; CHECK-MVE-NEXT: movs r2, #0 ; CHECK-MVE-NEXT: lsls r0, r0, #31 @@ -1111,7 +1111,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ogt_v8f16(<8 x half> %src, half* %src2p, ; CHECK-MVE-NEXT: cset r2, ne ; CHECK-MVE-NEXT: vmov r0, s12 ; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vcmpe.f16 s1, s16 +; CHECK-MVE-NEXT: vcmp.f16 s1, s16 ; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vmov r2, s12 @@ -1128,7 +1128,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ogt_v8f16(<8 x half> %src, half* %src2p, ; CHECK-MVE-NEXT: vseleq.f16 s18, s9, s5 ; CHECK-MVE-NEXT: vmov r0, s18 ; CHECK-MVE-NEXT: vmovx.f16 s18, s1 -; CHECK-MVE-NEXT: vcmpe.f16 s18, s16 +; CHECK-MVE-NEXT: vcmp.f16 s18, s16 ; CHECK-MVE-NEXT: vmov.16 q3[2], r0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r0, #0 @@ -1138,7 +1138,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ogt_v8f16(<8 x half> %src, half* %src2p, ; CHECK-MVE-NEXT: cset r0, ne ; CHECK-MVE-NEXT: vmovx.f16 s18, s5 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vcmpe.f16 s2, s16 +; CHECK-MVE-NEXT: vcmp.f16 s2, s16 ; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vmov r0, s18 @@ -1153,7 +1153,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ogt_v8f16(<8 x half> %src, half* %src2p, ; CHECK-MVE-NEXT: vseleq.f16 s18, s10, s6 ; CHECK-MVE-NEXT: vmov r0, s18 ; CHECK-MVE-NEXT: vmovx.f16 s18, s2 -; CHECK-MVE-NEXT: vcmpe.f16 s18, s16 +; CHECK-MVE-NEXT: vcmp.f16 s18, s16 ; CHECK-MVE-NEXT: vmov.16 q3[4], r0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r0, #0 @@ -1163,11 +1163,11 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ogt_v8f16(<8 x half> %src, half* %src2p, ; CHECK-MVE-NEXT: cset r0, ne ; CHECK-MVE-NEXT: vmovx.f16 s18, s6 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vcmpe.f16 s3, s16 +; CHECK-MVE-NEXT: vcmp.f16 s3, s16 ; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vmov r0, s18 -; CHECK-MVE-NEXT: vcmpe.f16 s0, s16 +; CHECK-MVE-NEXT: vcmp.f16 s0, s16 ; CHECK-MVE-NEXT: vmov.16 q3[5], r0 ; CHECK-MVE-NEXT: mov.w r0, #0 ; CHECK-MVE-NEXT: it gt @@ -1218,13 +1218,13 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_oge_v8f16(<8 x half> %src, half* %src2p, ; CHECK-MVE-NEXT: vmovx.f16 s12, s0 ; CHECK-MVE-NEXT: movs r0, #0 ; CHECK-MVE-NEXT: vmovx.f16 s14, s8 -; CHECK-MVE-NEXT: vcmpe.f16 s12, s16 +; CHECK-MVE-NEXT: vcmp.f16 s12, s16 ; CHECK-MVE-NEXT: vmovx.f16 s12, s4 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it ge ; CHECK-MVE-NEXT: movge r0, #1 ; CHECK-MVE-NEXT: cmp r0, #0 -; CHECK-MVE-NEXT: vcmpe.f16 s0, s16 +; CHECK-MVE-NEXT: vcmp.f16 s0, s16 ; CHECK-MVE-NEXT: cset r0, ne ; CHECK-MVE-NEXT: movs r2, #0 ; CHECK-MVE-NEXT: lsls r0, r0, #31 @@ -1237,7 +1237,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_oge_v8f16(<8 x half> %src, half* %src2p, ; CHECK-MVE-NEXT: cset r2, ne ; CHECK-MVE-NEXT: vmov r0, s12 ; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vcmpe.f16 s1, s16 +; CHECK-MVE-NEXT: vcmp.f16 s1, s16 ; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vmov r2, s12 @@ -1254,7 +1254,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_oge_v8f16(<8 x half> %src, half* %src2p, ; CHECK-MVE-NEXT: vseleq.f16 s18, s9, s5 ; CHECK-MVE-NEXT: vmov r0, s18 ; CHECK-MVE-NEXT: vmovx.f16 s18, s1 -; CHECK-MVE-NEXT: vcmpe.f16 s18, s16 +; CHECK-MVE-NEXT: vcmp.f16 s18, s16 ; CHECK-MVE-NEXT: vmov.16 q3[2], r0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r0, #0 @@ -1264,7 +1264,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_oge_v8f16(<8 x half> %src, half* %src2p, ; CHECK-MVE-NEXT: cset r0, ne ; CHECK-MVE-NEXT: vmovx.f16 s18, s5 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vcmpe.f16 s2, s16 +; CHECK-MVE-NEXT: vcmp.f16 s2, s16 ; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vmov r0, s18 @@ -1279,7 +1279,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_oge_v8f16(<8 x half> %src, half* %src2p, ; CHECK-MVE-NEXT: vseleq.f16 s18, s10, s6 ; CHECK-MVE-NEXT: vmov r0, s18 ; CHECK-MVE-NEXT: vmovx.f16 s18, s2 -; CHECK-MVE-NEXT: vcmpe.f16 s18, s16 +; CHECK-MVE-NEXT: vcmp.f16 s18, s16 ; CHECK-MVE-NEXT: vmov.16 q3[4], r0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r0, #0 @@ -1289,11 +1289,11 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_oge_v8f16(<8 x half> %src, half* %src2p, ; CHECK-MVE-NEXT: cset r0, ne ; CHECK-MVE-NEXT: vmovx.f16 s18, s6 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vcmpe.f16 s3, s16 +; CHECK-MVE-NEXT: vcmp.f16 s3, s16 ; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vmov r0, s18 -; CHECK-MVE-NEXT: vcmpe.f16 s0, s16 +; CHECK-MVE-NEXT: vcmp.f16 s0, s16 ; CHECK-MVE-NEXT: vmov.16 q3[5], r0 ; CHECK-MVE-NEXT: mov.w r0, #0 ; CHECK-MVE-NEXT: it ge @@ -1344,13 +1344,13 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_olt_v8f16(<8 x half> %src, half* %src2p, ; CHECK-MVE-NEXT: vmovx.f16 s12, s0 ; CHECK-MVE-NEXT: movs r0, #0 ; CHECK-MVE-NEXT: vmovx.f16 s14, s8 -; CHECK-MVE-NEXT: vcmpe.f16 s12, s16 +; CHECK-MVE-NEXT: vcmp.f16 s12, s16 ; CHECK-MVE-NEXT: vmovx.f16 s12, s4 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r0, #1 ; CHECK-MVE-NEXT: cmp r0, #0 -; CHECK-MVE-NEXT: vcmpe.f16 s0, s16 +; CHECK-MVE-NEXT: vcmp.f16 s0, s16 ; CHECK-MVE-NEXT: cset r0, ne ; CHECK-MVE-NEXT: movs r2, #0 ; CHECK-MVE-NEXT: lsls r0, r0, #31 @@ -1363,7 +1363,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_olt_v8f16(<8 x half> %src, half* %src2p, ; CHECK-MVE-NEXT: cset r2, ne ; CHECK-MVE-NEXT: vmov r0, s12 ; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vcmpe.f16 s1, s16 +; CHECK-MVE-NEXT: vcmp.f16 s1, s16 ; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vmov r2, s12 @@ -1380,7 +1380,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_olt_v8f16(<8 x half> %src, half* %src2p, ; CHECK-MVE-NEXT: vseleq.f16 s18, s9, s5 ; CHECK-MVE-NEXT: vmov r0, s18 ; CHECK-MVE-NEXT: vmovx.f16 s18, s1 -; CHECK-MVE-NEXT: vcmpe.f16 s18, s16 +; CHECK-MVE-NEXT: vcmp.f16 s18, s16 ; CHECK-MVE-NEXT: vmov.16 q3[2], r0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r0, #0 @@ -1390,7 +1390,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_olt_v8f16(<8 x half> %src, half* %src2p, ; CHECK-MVE-NEXT: cset r0, ne ; CHECK-MVE-NEXT: vmovx.f16 s18, s5 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vcmpe.f16 s2, s16 +; CHECK-MVE-NEXT: vcmp.f16 s2, s16 ; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vmov r0, s18 @@ -1405,7 +1405,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_olt_v8f16(<8 x half> %src, half* %src2p, ; CHECK-MVE-NEXT: vseleq.f16 s18, s10, s6 ; CHECK-MVE-NEXT: vmov r0, s18 ; CHECK-MVE-NEXT: vmovx.f16 s18, s2 -; CHECK-MVE-NEXT: vcmpe.f16 s18, s16 +; CHECK-MVE-NEXT: vcmp.f16 s18, s16 ; CHECK-MVE-NEXT: vmov.16 q3[4], r0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r0, #0 @@ -1415,11 +1415,11 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_olt_v8f16(<8 x half> %src, half* %src2p, ; CHECK-MVE-NEXT: cset r0, ne ; CHECK-MVE-NEXT: vmovx.f16 s18, s6 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vcmpe.f16 s3, s16 +; CHECK-MVE-NEXT: vcmp.f16 s3, s16 ; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vmov r0, s18 -; CHECK-MVE-NEXT: vcmpe.f16 s0, s16 +; CHECK-MVE-NEXT: vcmp.f16 s0, s16 ; CHECK-MVE-NEXT: vmov.16 q3[5], r0 ; CHECK-MVE-NEXT: mov.w r0, #0 ; CHECK-MVE-NEXT: it mi @@ -1471,13 +1471,13 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ole_v8f16(<8 x half> %src, half* %src2p, ; CHECK-MVE-NEXT: vmovx.f16 s12, s0 ; CHECK-MVE-NEXT: movs r0, #0 ; CHECK-MVE-NEXT: vmovx.f16 s14, s8 -; CHECK-MVE-NEXT: vcmpe.f16 s12, s16 +; CHECK-MVE-NEXT: vcmp.f16 s12, s16 ; CHECK-MVE-NEXT: vmovx.f16 s12, s4 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it ls ; CHECK-MVE-NEXT: movls r0, #1 ; CHECK-MVE-NEXT: cmp r0, #0 -; CHECK-MVE-NEXT: vcmpe.f16 s0, s16 +; CHECK-MVE-NEXT: vcmp.f16 s0, s16 ; CHECK-MVE-NEXT: cset r0, ne ; CHECK-MVE-NEXT: movs r2, #0 ; CHECK-MVE-NEXT: lsls r0, r0, #31 @@ -1490,7 +1490,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ole_v8f16(<8 x half> %src, half* %src2p, ; CHECK-MVE-NEXT: cset r2, ne ; CHECK-MVE-NEXT: vmov r0, s12 ; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vcmpe.f16 s1, s16 +; CHECK-MVE-NEXT: vcmp.f16 s1, s16 ; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vmov r2, s12 @@ -1507,7 +1507,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ole_v8f16(<8 x half> %src, half* %src2p, ; CHECK-MVE-NEXT: vseleq.f16 s18, s9, s5 ; CHECK-MVE-NEXT: vmov r0, s18 ; CHECK-MVE-NEXT: vmovx.f16 s18, s1 -; CHECK-MVE-NEXT: vcmpe.f16 s18, s16 +; CHECK-MVE-NEXT: vcmp.f16 s18, s16 ; CHECK-MVE-NEXT: vmov.16 q3[2], r0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r0, #0 @@ -1517,7 +1517,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ole_v8f16(<8 x half> %src, half* %src2p, ; CHECK-MVE-NEXT: cset r0, ne ; CHECK-MVE-NEXT: vmovx.f16 s18, s5 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vcmpe.f16 s2, s16 +; CHECK-MVE-NEXT: vcmp.f16 s2, s16 ; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vmov r0, s18 @@ -1532,7 +1532,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ole_v8f16(<8 x half> %src, half* %src2p, ; CHECK-MVE-NEXT: vseleq.f16 s18, s10, s6 ; CHECK-MVE-NEXT: vmov r0, s18 ; CHECK-MVE-NEXT: vmovx.f16 s18, s2 -; CHECK-MVE-NEXT: vcmpe.f16 s18, s16 +; CHECK-MVE-NEXT: vcmp.f16 s18, s16 ; CHECK-MVE-NEXT: vmov.16 q3[4], r0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r0, #0 @@ -1542,11 +1542,11 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ole_v8f16(<8 x half> %src, half* %src2p, ; CHECK-MVE-NEXT: cset r0, ne ; CHECK-MVE-NEXT: vmovx.f16 s18, s6 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vcmpe.f16 s3, s16 +; CHECK-MVE-NEXT: vcmp.f16 s3, s16 ; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vmov r0, s18 -; CHECK-MVE-NEXT: vcmpe.f16 s0, s16 +; CHECK-MVE-NEXT: vcmp.f16 s0, s16 ; CHECK-MVE-NEXT: vmov.16 q3[5], r0 ; CHECK-MVE-NEXT: mov.w r0, #0 ; CHECK-MVE-NEXT: it ls @@ -1868,13 +1868,13 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ugt_v8f16(<8 x half> %src, half* %src2p, ; CHECK-MVE-NEXT: vmovx.f16 s12, s0 ; CHECK-MVE-NEXT: movs r0, #0 ; CHECK-MVE-NEXT: vmovx.f16 s14, s8 -; CHECK-MVE-NEXT: vcmpe.f16 s12, s16 +; CHECK-MVE-NEXT: vcmp.f16 s12, s16 ; CHECK-MVE-NEXT: vmovx.f16 s12, s4 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it hi ; CHECK-MVE-NEXT: movhi r0, #1 ; CHECK-MVE-NEXT: cmp r0, #0 -; CHECK-MVE-NEXT: vcmpe.f16 s0, s16 +; CHECK-MVE-NEXT: vcmp.f16 s0, s16 ; CHECK-MVE-NEXT: cset r0, ne ; CHECK-MVE-NEXT: movs r2, #0 ; CHECK-MVE-NEXT: lsls r0, r0, #31 @@ -1887,7 +1887,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ugt_v8f16(<8 x half> %src, half* %src2p, ; CHECK-MVE-NEXT: cset r2, ne ; CHECK-MVE-NEXT: vmov r0, s12 ; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vcmpe.f16 s1, s16 +; CHECK-MVE-NEXT: vcmp.f16 s1, s16 ; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vmov r2, s12 @@ -1904,7 +1904,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ugt_v8f16(<8 x half> %src, half* %src2p, ; CHECK-MVE-NEXT: vseleq.f16 s18, s9, s5 ; CHECK-MVE-NEXT: vmov r0, s18 ; CHECK-MVE-NEXT: vmovx.f16 s18, s1 -; CHECK-MVE-NEXT: vcmpe.f16 s18, s16 +; CHECK-MVE-NEXT: vcmp.f16 s18, s16 ; CHECK-MVE-NEXT: vmov.16 q3[2], r0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r0, #0 @@ -1914,7 +1914,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ugt_v8f16(<8 x half> %src, half* %src2p, ; CHECK-MVE-NEXT: cset r0, ne ; CHECK-MVE-NEXT: vmovx.f16 s18, s5 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vcmpe.f16 s2, s16 +; CHECK-MVE-NEXT: vcmp.f16 s2, s16 ; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vmov r0, s18 @@ -1929,7 +1929,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ugt_v8f16(<8 x half> %src, half* %src2p, ; CHECK-MVE-NEXT: vseleq.f16 s18, s10, s6 ; CHECK-MVE-NEXT: vmov r0, s18 ; CHECK-MVE-NEXT: vmovx.f16 s18, s2 -; CHECK-MVE-NEXT: vcmpe.f16 s18, s16 +; CHECK-MVE-NEXT: vcmp.f16 s18, s16 ; CHECK-MVE-NEXT: vmov.16 q3[4], r0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r0, #0 @@ -1939,11 +1939,11 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ugt_v8f16(<8 x half> %src, half* %src2p, ; CHECK-MVE-NEXT: cset r0, ne ; CHECK-MVE-NEXT: vmovx.f16 s18, s6 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vcmpe.f16 s3, s16 +; CHECK-MVE-NEXT: vcmp.f16 s3, s16 ; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vmov r0, s18 -; CHECK-MVE-NEXT: vcmpe.f16 s0, s16 +; CHECK-MVE-NEXT: vcmp.f16 s0, s16 ; CHECK-MVE-NEXT: vmov.16 q3[5], r0 ; CHECK-MVE-NEXT: mov.w r0, #0 ; CHECK-MVE-NEXT: it hi @@ -1996,13 +1996,13 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_uge_v8f16(<8 x half> %src, half* %src2p, ; CHECK-MVE-NEXT: vmovx.f16 s12, s0 ; CHECK-MVE-NEXT: movs r0, #0 ; CHECK-MVE-NEXT: vmovx.f16 s14, s8 -; CHECK-MVE-NEXT: vcmpe.f16 s12, s16 +; CHECK-MVE-NEXT: vcmp.f16 s12, s16 ; CHECK-MVE-NEXT: vmovx.f16 s12, s4 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it pl ; CHECK-MVE-NEXT: movpl r0, #1 ; CHECK-MVE-NEXT: cmp r0, #0 -; CHECK-MVE-NEXT: vcmpe.f16 s0, s16 +; CHECK-MVE-NEXT: vcmp.f16 s0, s16 ; CHECK-MVE-NEXT: cset r0, ne ; CHECK-MVE-NEXT: movs r2, #0 ; CHECK-MVE-NEXT: lsls r0, r0, #31 @@ -2015,7 +2015,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_uge_v8f16(<8 x half> %src, half* %src2p, ; CHECK-MVE-NEXT: cset r2, ne ; CHECK-MVE-NEXT: vmov r0, s12 ; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vcmpe.f16 s1, s16 +; CHECK-MVE-NEXT: vcmp.f16 s1, s16 ; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vmov r2, s12 @@ -2032,7 +2032,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_uge_v8f16(<8 x half> %src, half* %src2p, ; CHECK-MVE-NEXT: vseleq.f16 s18, s9, s5 ; CHECK-MVE-NEXT: vmov r0, s18 ; CHECK-MVE-NEXT: vmovx.f16 s18, s1 -; CHECK-MVE-NEXT: vcmpe.f16 s18, s16 +; CHECK-MVE-NEXT: vcmp.f16 s18, s16 ; CHECK-MVE-NEXT: vmov.16 q3[2], r0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r0, #0 @@ -2042,7 +2042,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_uge_v8f16(<8 x half> %src, half* %src2p, ; CHECK-MVE-NEXT: cset r0, ne ; CHECK-MVE-NEXT: vmovx.f16 s18, s5 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vcmpe.f16 s2, s16 +; CHECK-MVE-NEXT: vcmp.f16 s2, s16 ; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vmov r0, s18 @@ -2057,7 +2057,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_uge_v8f16(<8 x half> %src, half* %src2p, ; CHECK-MVE-NEXT: vseleq.f16 s18, s10, s6 ; CHECK-MVE-NEXT: vmov r0, s18 ; CHECK-MVE-NEXT: vmovx.f16 s18, s2 -; CHECK-MVE-NEXT: vcmpe.f16 s18, s16 +; CHECK-MVE-NEXT: vcmp.f16 s18, s16 ; CHECK-MVE-NEXT: vmov.16 q3[4], r0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r0, #0 @@ -2067,11 +2067,11 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_uge_v8f16(<8 x half> %src, half* %src2p, ; CHECK-MVE-NEXT: cset r0, ne ; CHECK-MVE-NEXT: vmovx.f16 s18, s6 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vcmpe.f16 s3, s16 +; CHECK-MVE-NEXT: vcmp.f16 s3, s16 ; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vmov r0, s18 -; CHECK-MVE-NEXT: vcmpe.f16 s0, s16 +; CHECK-MVE-NEXT: vcmp.f16 s0, s16 ; CHECK-MVE-NEXT: vmov.16 q3[5], r0 ; CHECK-MVE-NEXT: mov.w r0, #0 ; CHECK-MVE-NEXT: it pl @@ -2124,13 +2124,13 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ult_v8f16(<8 x half> %src, half* %src2p, ; CHECK-MVE-NEXT: vmovx.f16 s12, s0 ; CHECK-MVE-NEXT: movs r0, #0 ; CHECK-MVE-NEXT: vmovx.f16 s14, s8 -; CHECK-MVE-NEXT: vcmpe.f16 s12, s16 +; CHECK-MVE-NEXT: vcmp.f16 s12, s16 ; CHECK-MVE-NEXT: vmovx.f16 s12, s4 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it lt ; CHECK-MVE-NEXT: movlt r0, #1 ; CHECK-MVE-NEXT: cmp r0, #0 -; CHECK-MVE-NEXT: vcmpe.f16 s0, s16 +; CHECK-MVE-NEXT: vcmp.f16 s0, s16 ; CHECK-MVE-NEXT: cset r0, ne ; CHECK-MVE-NEXT: movs r2, #0 ; CHECK-MVE-NEXT: lsls r0, r0, #31 @@ -2143,7 +2143,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ult_v8f16(<8 x half> %src, half* %src2p, ; CHECK-MVE-NEXT: cset r2, ne ; CHECK-MVE-NEXT: vmov r0, s12 ; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vcmpe.f16 s1, s16 +; CHECK-MVE-NEXT: vcmp.f16 s1, s16 ; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vmov r2, s12 @@ -2160,7 +2160,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ult_v8f16(<8 x half> %src, half* %src2p, ; CHECK-MVE-NEXT: vseleq.f16 s18, s9, s5 ; CHECK-MVE-NEXT: vmov r0, s18 ; CHECK-MVE-NEXT: vmovx.f16 s18, s1 -; CHECK-MVE-NEXT: vcmpe.f16 s18, s16 +; CHECK-MVE-NEXT: vcmp.f16 s18, s16 ; CHECK-MVE-NEXT: vmov.16 q3[2], r0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r0, #0 @@ -2170,7 +2170,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ult_v8f16(<8 x half> %src, half* %src2p, ; CHECK-MVE-NEXT: cset r0, ne ; CHECK-MVE-NEXT: vmovx.f16 s18, s5 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vcmpe.f16 s2, s16 +; CHECK-MVE-NEXT: vcmp.f16 s2, s16 ; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vmov r0, s18 @@ -2185,7 +2185,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ult_v8f16(<8 x half> %src, half* %src2p, ; CHECK-MVE-NEXT: vseleq.f16 s18, s10, s6 ; CHECK-MVE-NEXT: vmov r0, s18 ; CHECK-MVE-NEXT: vmovx.f16 s18, s2 -; CHECK-MVE-NEXT: vcmpe.f16 s18, s16 +; CHECK-MVE-NEXT: vcmp.f16 s18, s16 ; CHECK-MVE-NEXT: vmov.16 q3[4], r0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r0, #0 @@ -2195,11 +2195,11 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ult_v8f16(<8 x half> %src, half* %src2p, ; CHECK-MVE-NEXT: cset r0, ne ; CHECK-MVE-NEXT: vmovx.f16 s18, s6 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vcmpe.f16 s3, s16 +; CHECK-MVE-NEXT: vcmp.f16 s3, s16 ; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vmov r0, s18 -; CHECK-MVE-NEXT: vcmpe.f16 s0, s16 +; CHECK-MVE-NEXT: vcmp.f16 s0, s16 ; CHECK-MVE-NEXT: vmov.16 q3[5], r0 ; CHECK-MVE-NEXT: mov.w r0, #0 ; CHECK-MVE-NEXT: it lt @@ -2251,13 +2251,13 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ule_v8f16(<8 x half> %src, half* %src2p, ; CHECK-MVE-NEXT: vmovx.f16 s12, s0 ; CHECK-MVE-NEXT: movs r0, #0 ; CHECK-MVE-NEXT: vmovx.f16 s14, s8 -; CHECK-MVE-NEXT: vcmpe.f16 s12, s16 +; CHECK-MVE-NEXT: vcmp.f16 s12, s16 ; CHECK-MVE-NEXT: vmovx.f16 s12, s4 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it le ; CHECK-MVE-NEXT: movle r0, #1 ; CHECK-MVE-NEXT: cmp r0, #0 -; CHECK-MVE-NEXT: vcmpe.f16 s0, s16 +; CHECK-MVE-NEXT: vcmp.f16 s0, s16 ; CHECK-MVE-NEXT: cset r0, ne ; CHECK-MVE-NEXT: movs r2, #0 ; CHECK-MVE-NEXT: lsls r0, r0, #31 @@ -2270,7 +2270,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ule_v8f16(<8 x half> %src, half* %src2p, ; CHECK-MVE-NEXT: cset r2, ne ; CHECK-MVE-NEXT: vmov r0, s12 ; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vcmpe.f16 s1, s16 +; CHECK-MVE-NEXT: vcmp.f16 s1, s16 ; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vmov r2, s12 @@ -2287,7 +2287,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ule_v8f16(<8 x half> %src, half* %src2p, ; CHECK-MVE-NEXT: vseleq.f16 s18, s9, s5 ; CHECK-MVE-NEXT: vmov r0, s18 ; CHECK-MVE-NEXT: vmovx.f16 s18, s1 -; CHECK-MVE-NEXT: vcmpe.f16 s18, s16 +; CHECK-MVE-NEXT: vcmp.f16 s18, s16 ; CHECK-MVE-NEXT: vmov.16 q3[2], r0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r0, #0 @@ -2297,7 +2297,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ule_v8f16(<8 x half> %src, half* %src2p, ; CHECK-MVE-NEXT: cset r0, ne ; CHECK-MVE-NEXT: vmovx.f16 s18, s5 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vcmpe.f16 s2, s16 +; CHECK-MVE-NEXT: vcmp.f16 s2, s16 ; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vmov r0, s18 @@ -2312,7 +2312,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ule_v8f16(<8 x half> %src, half* %src2p, ; CHECK-MVE-NEXT: vseleq.f16 s18, s10, s6 ; CHECK-MVE-NEXT: vmov r0, s18 ; CHECK-MVE-NEXT: vmovx.f16 s18, s2 -; CHECK-MVE-NEXT: vcmpe.f16 s18, s16 +; CHECK-MVE-NEXT: vcmp.f16 s18, s16 ; CHECK-MVE-NEXT: vmov.16 q3[4], r0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r0, #0 @@ -2322,11 +2322,11 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ule_v8f16(<8 x half> %src, half* %src2p, ; CHECK-MVE-NEXT: cset r0, ne ; CHECK-MVE-NEXT: vmovx.f16 s18, s6 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vcmpe.f16 s3, s16 +; CHECK-MVE-NEXT: vcmp.f16 s3, s16 ; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vmov r0, s18 -; CHECK-MVE-NEXT: vcmpe.f16 s0, s16 +; CHECK-MVE-NEXT: vcmp.f16 s0, s16 ; CHECK-MVE-NEXT: vmov.16 q3[5], r0 ; CHECK-MVE-NEXT: mov.w r0, #0 ; CHECK-MVE-NEXT: it le @@ -2378,13 +2378,13 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ord_v8f16(<8 x half> %src, half* %src2p, ; CHECK-MVE-NEXT: vmovx.f16 s12, s0 ; CHECK-MVE-NEXT: movs r0, #0 ; CHECK-MVE-NEXT: vmovx.f16 s14, s8 -; CHECK-MVE-NEXT: vcmpe.f16 s12, s16 +; CHECK-MVE-NEXT: vcmp.f16 s12, s16 ; CHECK-MVE-NEXT: vmovx.f16 s12, s4 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it vc ; CHECK-MVE-NEXT: movvc r0, #1 ; CHECK-MVE-NEXT: cmp r0, #0 -; CHECK-MVE-NEXT: vcmpe.f16 s0, s16 +; CHECK-MVE-NEXT: vcmp.f16 s0, s16 ; CHECK-MVE-NEXT: cset r0, ne ; CHECK-MVE-NEXT: movs r2, #0 ; CHECK-MVE-NEXT: lsls r0, r0, #31 @@ -2397,7 +2397,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ord_v8f16(<8 x half> %src, half* %src2p, ; CHECK-MVE-NEXT: cset r2, ne ; CHECK-MVE-NEXT: vmov r0, s12 ; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vcmpe.f16 s1, s16 +; CHECK-MVE-NEXT: vcmp.f16 s1, s16 ; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vmov r2, s12 @@ -2414,7 +2414,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ord_v8f16(<8 x half> %src, half* %src2p, ; CHECK-MVE-NEXT: vseleq.f16 s18, s9, s5 ; CHECK-MVE-NEXT: vmov r0, s18 ; CHECK-MVE-NEXT: vmovx.f16 s18, s1 -; CHECK-MVE-NEXT: vcmpe.f16 s18, s16 +; CHECK-MVE-NEXT: vcmp.f16 s18, s16 ; CHECK-MVE-NEXT: vmov.16 q3[2], r0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r0, #0 @@ -2424,7 +2424,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ord_v8f16(<8 x half> %src, half* %src2p, ; CHECK-MVE-NEXT: cset r0, ne ; CHECK-MVE-NEXT: vmovx.f16 s18, s5 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vcmpe.f16 s2, s16 +; CHECK-MVE-NEXT: vcmp.f16 s2, s16 ; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vmov r0, s18 @@ -2439,7 +2439,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ord_v8f16(<8 x half> %src, half* %src2p, ; CHECK-MVE-NEXT: vseleq.f16 s18, s10, s6 ; CHECK-MVE-NEXT: vmov r0, s18 ; CHECK-MVE-NEXT: vmovx.f16 s18, s2 -; CHECK-MVE-NEXT: vcmpe.f16 s18, s16 +; CHECK-MVE-NEXT: vcmp.f16 s18, s16 ; CHECK-MVE-NEXT: vmov.16 q3[4], r0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r0, #0 @@ -2449,11 +2449,11 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ord_v8f16(<8 x half> %src, half* %src2p, ; CHECK-MVE-NEXT: cset r0, ne ; CHECK-MVE-NEXT: vmovx.f16 s18, s6 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vcmpe.f16 s3, s16 +; CHECK-MVE-NEXT: vcmp.f16 s3, s16 ; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vmov r0, s18 -; CHECK-MVE-NEXT: vcmpe.f16 s0, s16 +; CHECK-MVE-NEXT: vcmp.f16 s0, s16 ; CHECK-MVE-NEXT: vmov.16 q3[5], r0 ; CHECK-MVE-NEXT: mov.w r0, #0 ; CHECK-MVE-NEXT: it vc @@ -2507,13 +2507,13 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_uno_v8f16(<8 x half> %src, half* %src2p, ; CHECK-MVE-NEXT: vmovx.f16 s12, s0 ; CHECK-MVE-NEXT: movs r0, #0 ; CHECK-MVE-NEXT: vmovx.f16 s14, s8 -; CHECK-MVE-NEXT: vcmpe.f16 s12, s16 +; CHECK-MVE-NEXT: vcmp.f16 s12, s16 ; CHECK-MVE-NEXT: vmovx.f16 s12, s4 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r0, #1 ; CHECK-MVE-NEXT: cmp r0, #0 -; CHECK-MVE-NEXT: vcmpe.f16 s0, s16 +; CHECK-MVE-NEXT: vcmp.f16 s0, s16 ; CHECK-MVE-NEXT: cset r0, ne ; CHECK-MVE-NEXT: movs r2, #0 ; CHECK-MVE-NEXT: lsls r0, r0, #31 @@ -2526,7 +2526,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_uno_v8f16(<8 x half> %src, half* %src2p, ; CHECK-MVE-NEXT: cset r2, ne ; CHECK-MVE-NEXT: vmov r0, s12 ; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vcmpe.f16 s1, s16 +; CHECK-MVE-NEXT: vcmp.f16 s1, s16 ; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vmov r2, s12 @@ -2543,7 +2543,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_uno_v8f16(<8 x half> %src, half* %src2p, ; CHECK-MVE-NEXT: vseleq.f16 s18, s9, s5 ; CHECK-MVE-NEXT: vmov r0, s18 ; CHECK-MVE-NEXT: vmovx.f16 s18, s1 -; CHECK-MVE-NEXT: vcmpe.f16 s18, s16 +; CHECK-MVE-NEXT: vcmp.f16 s18, s16 ; CHECK-MVE-NEXT: vmov.16 q3[2], r0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r0, #0 @@ -2553,7 +2553,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_uno_v8f16(<8 x half> %src, half* %src2p, ; CHECK-MVE-NEXT: cset r0, ne ; CHECK-MVE-NEXT: vmovx.f16 s18, s5 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vcmpe.f16 s2, s16 +; CHECK-MVE-NEXT: vcmp.f16 s2, s16 ; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vmov r0, s18 @@ -2568,7 +2568,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_uno_v8f16(<8 x half> %src, half* %src2p, ; CHECK-MVE-NEXT: vseleq.f16 s18, s10, s6 ; CHECK-MVE-NEXT: vmov r0, s18 ; CHECK-MVE-NEXT: vmovx.f16 s18, s2 -; CHECK-MVE-NEXT: vcmpe.f16 s18, s16 +; CHECK-MVE-NEXT: vcmp.f16 s18, s16 ; CHECK-MVE-NEXT: vmov.16 q3[4], r0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r0, #0 @@ -2578,11 +2578,11 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_uno_v8f16(<8 x half> %src, half* %src2p, ; CHECK-MVE-NEXT: cset r0, ne ; CHECK-MVE-NEXT: vmovx.f16 s18, s6 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vcmpe.f16 s3, s16 +; CHECK-MVE-NEXT: vcmp.f16 s3, s16 ; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vmov r0, s18 -; CHECK-MVE-NEXT: vcmpe.f16 s0, s16 +; CHECK-MVE-NEXT: vcmp.f16 s0, s16 ; CHECK-MVE-NEXT: vmov.16 q3[5], r0 ; CHECK-MVE-NEXT: mov.w r0, #0 ; CHECK-MVE-NEXT: it vs diff --git a/llvm/test/CodeGen/Thumb2/mve-vcmpfz.ll b/llvm/test/CodeGen/Thumb2/mve-vcmpfz.ll index 126e00a31a8d8..6aae7e7665a10 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vcmpfz.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vcmpfz.ll @@ -122,24 +122,24 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_ogt_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_ogt_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmpe.f32 s0, #0 +; CHECK-MVE-NEXT: vcmp.f32 s0, #0 ; CHECK-MVE-NEXT: movs r1, #0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it gt ; CHECK-MVE-NEXT: movgt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmpe.f32 s1, #0 +; CHECK-MVE-NEXT: vcmp.f32 s1, #0 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r2, #0 -; CHECK-MVE-NEXT: vcmpe.f32 s2, #0 +; CHECK-MVE-NEXT: vcmp.f32 s2, #0 ; CHECK-MVE-NEXT: it gt ; CHECK-MVE-NEXT: movgt r2, #1 ; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: cset r2, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r3, #0 -; CHECK-MVE-NEXT: vcmpe.f32 s3, #0 +; CHECK-MVE-NEXT: vcmp.f32 s3, #0 ; CHECK-MVE-NEXT: it gt ; CHECK-MVE-NEXT: movgt r3, #1 ; CHECK-MVE-NEXT: cmp r3, #0 @@ -174,24 +174,24 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_oge_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_oge_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmpe.f32 s0, #0 +; CHECK-MVE-NEXT: vcmp.f32 s0, #0 ; CHECK-MVE-NEXT: movs r1, #0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it ge ; CHECK-MVE-NEXT: movge r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmpe.f32 s1, #0 +; CHECK-MVE-NEXT: vcmp.f32 s1, #0 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r2, #0 -; CHECK-MVE-NEXT: vcmpe.f32 s2, #0 +; CHECK-MVE-NEXT: vcmp.f32 s2, #0 ; CHECK-MVE-NEXT: it ge ; CHECK-MVE-NEXT: movge r2, #1 ; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: cset r2, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r3, #0 -; CHECK-MVE-NEXT: vcmpe.f32 s3, #0 +; CHECK-MVE-NEXT: vcmp.f32 s3, #0 ; CHECK-MVE-NEXT: it ge ; CHECK-MVE-NEXT: movge r3, #1 ; CHECK-MVE-NEXT: cmp r3, #0 @@ -226,24 +226,24 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_olt_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_olt_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmpe.f32 s0, #0 +; CHECK-MVE-NEXT: vcmp.f32 s0, #0 ; CHECK-MVE-NEXT: movs r1, #0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmpe.f32 s1, #0 +; CHECK-MVE-NEXT: vcmp.f32 s1, #0 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r2, #0 -; CHECK-MVE-NEXT: vcmpe.f32 s2, #0 +; CHECK-MVE-NEXT: vcmp.f32 s2, #0 ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r2, #1 ; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: cset r2, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r3, #0 -; CHECK-MVE-NEXT: vcmpe.f32 s3, #0 +; CHECK-MVE-NEXT: vcmp.f32 s3, #0 ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r3, #1 ; CHECK-MVE-NEXT: cmp r3, #0 @@ -278,24 +278,24 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_ole_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_ole_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmpe.f32 s0, #0 +; CHECK-MVE-NEXT: vcmp.f32 s0, #0 ; CHECK-MVE-NEXT: movs r1, #0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it ls ; CHECK-MVE-NEXT: movls r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmpe.f32 s1, #0 +; CHECK-MVE-NEXT: vcmp.f32 s1, #0 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r2, #0 -; CHECK-MVE-NEXT: vcmpe.f32 s2, #0 +; CHECK-MVE-NEXT: vcmp.f32 s2, #0 ; CHECK-MVE-NEXT: it ls ; CHECK-MVE-NEXT: movls r2, #1 ; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: cset r2, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r3, #0 -; CHECK-MVE-NEXT: vcmpe.f32 s3, #0 +; CHECK-MVE-NEXT: vcmp.f32 s3, #0 ; CHECK-MVE-NEXT: it ls ; CHECK-MVE-NEXT: movls r3, #1 ; CHECK-MVE-NEXT: cmp r3, #0 @@ -446,24 +446,24 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_ugt_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_ugt_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmpe.f32 s0, #0 +; CHECK-MVE-NEXT: vcmp.f32 s0, #0 ; CHECK-MVE-NEXT: movs r1, #0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it hi ; CHECK-MVE-NEXT: movhi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmpe.f32 s1, #0 +; CHECK-MVE-NEXT: vcmp.f32 s1, #0 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r2, #0 -; CHECK-MVE-NEXT: vcmpe.f32 s2, #0 +; CHECK-MVE-NEXT: vcmp.f32 s2, #0 ; CHECK-MVE-NEXT: it hi ; CHECK-MVE-NEXT: movhi r2, #1 ; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: cset r2, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r3, #0 -; CHECK-MVE-NEXT: vcmpe.f32 s3, #0 +; CHECK-MVE-NEXT: vcmp.f32 s3, #0 ; CHECK-MVE-NEXT: it hi ; CHECK-MVE-NEXT: movhi r3, #1 ; CHECK-MVE-NEXT: cmp r3, #0 @@ -499,24 +499,24 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_uge_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_uge_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmpe.f32 s0, #0 +; CHECK-MVE-NEXT: vcmp.f32 s0, #0 ; CHECK-MVE-NEXT: movs r1, #0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it pl ; CHECK-MVE-NEXT: movpl r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmpe.f32 s1, #0 +; CHECK-MVE-NEXT: vcmp.f32 s1, #0 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r2, #0 -; CHECK-MVE-NEXT: vcmpe.f32 s2, #0 +; CHECK-MVE-NEXT: vcmp.f32 s2, #0 ; CHECK-MVE-NEXT: it pl ; CHECK-MVE-NEXT: movpl r2, #1 ; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: cset r2, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r3, #0 -; CHECK-MVE-NEXT: vcmpe.f32 s3, #0 +; CHECK-MVE-NEXT: vcmp.f32 s3, #0 ; CHECK-MVE-NEXT: it pl ; CHECK-MVE-NEXT: movpl r3, #1 ; CHECK-MVE-NEXT: cmp r3, #0 @@ -552,24 +552,24 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_ult_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_ult_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmpe.f32 s0, #0 +; CHECK-MVE-NEXT: vcmp.f32 s0, #0 ; CHECK-MVE-NEXT: movs r1, #0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it lt ; CHECK-MVE-NEXT: movlt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmpe.f32 s1, #0 +; CHECK-MVE-NEXT: vcmp.f32 s1, #0 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r2, #0 -; CHECK-MVE-NEXT: vcmpe.f32 s2, #0 +; CHECK-MVE-NEXT: vcmp.f32 s2, #0 ; CHECK-MVE-NEXT: it lt ; CHECK-MVE-NEXT: movlt r2, #1 ; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: cset r2, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r3, #0 -; CHECK-MVE-NEXT: vcmpe.f32 s3, #0 +; CHECK-MVE-NEXT: vcmp.f32 s3, #0 ; CHECK-MVE-NEXT: it lt ; CHECK-MVE-NEXT: movlt r3, #1 ; CHECK-MVE-NEXT: cmp r3, #0 @@ -605,24 +605,24 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_ule_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_ule_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmpe.f32 s0, #0 +; CHECK-MVE-NEXT: vcmp.f32 s0, #0 ; CHECK-MVE-NEXT: movs r1, #0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it le ; CHECK-MVE-NEXT: movle r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmpe.f32 s1, #0 +; CHECK-MVE-NEXT: vcmp.f32 s1, #0 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r2, #0 -; CHECK-MVE-NEXT: vcmpe.f32 s2, #0 +; CHECK-MVE-NEXT: vcmp.f32 s2, #0 ; CHECK-MVE-NEXT: it le ; CHECK-MVE-NEXT: movle r2, #1 ; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: cset r2, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r3, #0 -; CHECK-MVE-NEXT: vcmpe.f32 s3, #0 +; CHECK-MVE-NEXT: vcmp.f32 s3, #0 ; CHECK-MVE-NEXT: it le ; CHECK-MVE-NEXT: movle r3, #1 ; CHECK-MVE-NEXT: cmp r3, #0 @@ -658,24 +658,24 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_ord_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_ord_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmpe.f32 s0, s0 +; CHECK-MVE-NEXT: vcmp.f32 s0, s0 ; CHECK-MVE-NEXT: movs r1, #0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it vc ; CHECK-MVE-NEXT: movvc r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmpe.f32 s1, s1 +; CHECK-MVE-NEXT: vcmp.f32 s1, s1 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r2, #0 -; CHECK-MVE-NEXT: vcmpe.f32 s2, s2 +; CHECK-MVE-NEXT: vcmp.f32 s2, s2 ; CHECK-MVE-NEXT: it vc ; CHECK-MVE-NEXT: movvc r2, #1 ; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: cset r2, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r3, #0 -; CHECK-MVE-NEXT: vcmpe.f32 s3, s3 +; CHECK-MVE-NEXT: vcmp.f32 s3, s3 ; CHECK-MVE-NEXT: it vc ; CHECK-MVE-NEXT: movvc r3, #1 ; CHECK-MVE-NEXT: cmp r3, #0 @@ -713,24 +713,24 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_uno_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_uno_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmpe.f32 s0, s0 +; CHECK-MVE-NEXT: vcmp.f32 s0, s0 ; CHECK-MVE-NEXT: movs r1, #0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmpe.f32 s1, s1 +; CHECK-MVE-NEXT: vcmp.f32 s1, s1 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r2, #0 -; CHECK-MVE-NEXT: vcmpe.f32 s2, s2 +; CHECK-MVE-NEXT: vcmp.f32 s2, s2 ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r2, #1 ; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: cset r2, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r3, #0 -; CHECK-MVE-NEXT: vcmpe.f32 s3, s3 +; CHECK-MVE-NEXT: vcmp.f32 s3, s3 ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r3, #1 ; CHECK-MVE-NEXT: cmp r3, #0 @@ -1032,13 +1032,13 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ogt_v8f16(<8 x half> %src, <8 x half> %a ; CHECK-MVE-NEXT: vpush {d8, d9} ; CHECK-MVE-NEXT: vmovx.f16 s12, s0 ; CHECK-MVE-NEXT: movs r1, #0 -; CHECK-MVE-NEXT: vcmpe.f16 s12, #0 +; CHECK-MVE-NEXT: vcmp.f16 s12, #0 ; CHECK-MVE-NEXT: vmovx.f16 s12, s4 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it gt ; CHECK-MVE-NEXT: movgt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmpe.f16 s0, #0 +; CHECK-MVE-NEXT: vcmp.f16 s0, #0 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s14, s8 ; CHECK-MVE-NEXT: lsls r1, r1, #31 @@ -1051,7 +1051,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ogt_v8f16(<8 x half> %src, <8 x half> %a ; CHECK-MVE-NEXT: cset r2, ne ; CHECK-MVE-NEXT: vmov r1, s12 ; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vcmpe.f16 s1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s1, #0 ; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vmov r2, s12 @@ -1069,7 +1069,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ogt_v8f16(<8 x half> %src, <8 x half> %a ; CHECK-MVE-NEXT: vseleq.f16 s16, s9, s5 ; CHECK-MVE-NEXT: vmov r1, s16 ; CHECK-MVE-NEXT: vmovx.f16 s16, s1 -; CHECK-MVE-NEXT: vcmpe.f16 s16, #0 +; CHECK-MVE-NEXT: vcmp.f16 s16, #0 ; CHECK-MVE-NEXT: vmov.16 q3[2], r1 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r1, #0 @@ -1079,7 +1079,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ogt_v8f16(<8 x half> %src, <8 x half> %a ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s16, s5 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmpe.f16 s2, #0 +; CHECK-MVE-NEXT: vcmp.f16 s2, #0 ; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vmov r1, s16 @@ -1094,7 +1094,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ogt_v8f16(<8 x half> %src, <8 x half> %a ; CHECK-MVE-NEXT: vseleq.f16 s16, s10, s6 ; CHECK-MVE-NEXT: vmov r1, s16 ; CHECK-MVE-NEXT: vmovx.f16 s16, s2 -; CHECK-MVE-NEXT: vcmpe.f16 s16, #0 +; CHECK-MVE-NEXT: vcmp.f16 s16, #0 ; CHECK-MVE-NEXT: vmov.16 q3[4], r1 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r1, #0 @@ -1104,11 +1104,11 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ogt_v8f16(<8 x half> %src, <8 x half> %a ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s16, s6 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmpe.f16 s3, #0 +; CHECK-MVE-NEXT: vcmp.f16 s3, #0 ; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vmov r1, s16 -; CHECK-MVE-NEXT: vcmpe.f16 s0, #0 +; CHECK-MVE-NEXT: vcmp.f16 s0, #0 ; CHECK-MVE-NEXT: vmov.16 q3[5], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it gt @@ -1152,13 +1152,13 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_oge_v8f16(<8 x half> %src, <8 x half> %a ; CHECK-MVE-NEXT: vpush {d8, d9} ; CHECK-MVE-NEXT: vmovx.f16 s12, s0 ; CHECK-MVE-NEXT: movs r1, #0 -; CHECK-MVE-NEXT: vcmpe.f16 s12, #0 +; CHECK-MVE-NEXT: vcmp.f16 s12, #0 ; CHECK-MVE-NEXT: vmovx.f16 s12, s4 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it ge ; CHECK-MVE-NEXT: movge r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmpe.f16 s0, #0 +; CHECK-MVE-NEXT: vcmp.f16 s0, #0 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s14, s8 ; CHECK-MVE-NEXT: lsls r1, r1, #31 @@ -1171,7 +1171,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_oge_v8f16(<8 x half> %src, <8 x half> %a ; CHECK-MVE-NEXT: cset r2, ne ; CHECK-MVE-NEXT: vmov r1, s12 ; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vcmpe.f16 s1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s1, #0 ; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vmov r2, s12 @@ -1189,7 +1189,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_oge_v8f16(<8 x half> %src, <8 x half> %a ; CHECK-MVE-NEXT: vseleq.f16 s16, s9, s5 ; CHECK-MVE-NEXT: vmov r1, s16 ; CHECK-MVE-NEXT: vmovx.f16 s16, s1 -; CHECK-MVE-NEXT: vcmpe.f16 s16, #0 +; CHECK-MVE-NEXT: vcmp.f16 s16, #0 ; CHECK-MVE-NEXT: vmov.16 q3[2], r1 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r1, #0 @@ -1199,7 +1199,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_oge_v8f16(<8 x half> %src, <8 x half> %a ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s16, s5 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmpe.f16 s2, #0 +; CHECK-MVE-NEXT: vcmp.f16 s2, #0 ; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vmov r1, s16 @@ -1214,7 +1214,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_oge_v8f16(<8 x half> %src, <8 x half> %a ; CHECK-MVE-NEXT: vseleq.f16 s16, s10, s6 ; CHECK-MVE-NEXT: vmov r1, s16 ; CHECK-MVE-NEXT: vmovx.f16 s16, s2 -; CHECK-MVE-NEXT: vcmpe.f16 s16, #0 +; CHECK-MVE-NEXT: vcmp.f16 s16, #0 ; CHECK-MVE-NEXT: vmov.16 q3[4], r1 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r1, #0 @@ -1224,11 +1224,11 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_oge_v8f16(<8 x half> %src, <8 x half> %a ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s16, s6 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmpe.f16 s3, #0 +; CHECK-MVE-NEXT: vcmp.f16 s3, #0 ; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vmov r1, s16 -; CHECK-MVE-NEXT: vcmpe.f16 s0, #0 +; CHECK-MVE-NEXT: vcmp.f16 s0, #0 ; CHECK-MVE-NEXT: vmov.16 q3[5], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it ge @@ -1272,13 +1272,13 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_olt_v8f16(<8 x half> %src, <8 x half> %a ; CHECK-MVE-NEXT: vpush {d8, d9} ; CHECK-MVE-NEXT: vmovx.f16 s12, s0 ; CHECK-MVE-NEXT: movs r1, #0 -; CHECK-MVE-NEXT: vcmpe.f16 s12, #0 +; CHECK-MVE-NEXT: vcmp.f16 s12, #0 ; CHECK-MVE-NEXT: vmovx.f16 s12, s4 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it mi ; CHECK-MVE-NEXT: movmi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmpe.f16 s0, #0 +; CHECK-MVE-NEXT: vcmp.f16 s0, #0 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s14, s8 ; CHECK-MVE-NEXT: lsls r1, r1, #31 @@ -1291,7 +1291,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_olt_v8f16(<8 x half> %src, <8 x half> %a ; CHECK-MVE-NEXT: cset r2, ne ; CHECK-MVE-NEXT: vmov r1, s12 ; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vcmpe.f16 s1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s1, #0 ; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vmov r2, s12 @@ -1309,7 +1309,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_olt_v8f16(<8 x half> %src, <8 x half> %a ; CHECK-MVE-NEXT: vseleq.f16 s16, s9, s5 ; CHECK-MVE-NEXT: vmov r1, s16 ; CHECK-MVE-NEXT: vmovx.f16 s16, s1 -; CHECK-MVE-NEXT: vcmpe.f16 s16, #0 +; CHECK-MVE-NEXT: vcmp.f16 s16, #0 ; CHECK-MVE-NEXT: vmov.16 q3[2], r1 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r1, #0 @@ -1319,7 +1319,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_olt_v8f16(<8 x half> %src, <8 x half> %a ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s16, s5 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmpe.f16 s2, #0 +; CHECK-MVE-NEXT: vcmp.f16 s2, #0 ; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vmov r1, s16 @@ -1334,7 +1334,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_olt_v8f16(<8 x half> %src, <8 x half> %a ; CHECK-MVE-NEXT: vseleq.f16 s16, s10, s6 ; CHECK-MVE-NEXT: vmov r1, s16 ; CHECK-MVE-NEXT: vmovx.f16 s16, s2 -; CHECK-MVE-NEXT: vcmpe.f16 s16, #0 +; CHECK-MVE-NEXT: vcmp.f16 s16, #0 ; CHECK-MVE-NEXT: vmov.16 q3[4], r1 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r1, #0 @@ -1344,11 +1344,11 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_olt_v8f16(<8 x half> %src, <8 x half> %a ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s16, s6 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmpe.f16 s3, #0 +; CHECK-MVE-NEXT: vcmp.f16 s3, #0 ; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vmov r1, s16 -; CHECK-MVE-NEXT: vcmpe.f16 s0, #0 +; CHECK-MVE-NEXT: vcmp.f16 s0, #0 ; CHECK-MVE-NEXT: vmov.16 q3[5], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it mi @@ -1392,13 +1392,13 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ole_v8f16(<8 x half> %src, <8 x half> %a ; CHECK-MVE-NEXT: vpush {d8, d9} ; CHECK-MVE-NEXT: vmovx.f16 s12, s0 ; CHECK-MVE-NEXT: movs r1, #0 -; CHECK-MVE-NEXT: vcmpe.f16 s12, #0 +; CHECK-MVE-NEXT: vcmp.f16 s12, #0 ; CHECK-MVE-NEXT: vmovx.f16 s12, s4 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it ls ; CHECK-MVE-NEXT: movls r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmpe.f16 s0, #0 +; CHECK-MVE-NEXT: vcmp.f16 s0, #0 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s14, s8 ; CHECK-MVE-NEXT: lsls r1, r1, #31 @@ -1411,7 +1411,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ole_v8f16(<8 x half> %src, <8 x half> %a ; CHECK-MVE-NEXT: cset r2, ne ; CHECK-MVE-NEXT: vmov r1, s12 ; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vcmpe.f16 s1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s1, #0 ; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vmov r2, s12 @@ -1429,7 +1429,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ole_v8f16(<8 x half> %src, <8 x half> %a ; CHECK-MVE-NEXT: vseleq.f16 s16, s9, s5 ; CHECK-MVE-NEXT: vmov r1, s16 ; CHECK-MVE-NEXT: vmovx.f16 s16, s1 -; CHECK-MVE-NEXT: vcmpe.f16 s16, #0 +; CHECK-MVE-NEXT: vcmp.f16 s16, #0 ; CHECK-MVE-NEXT: vmov.16 q3[2], r1 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r1, #0 @@ -1439,7 +1439,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ole_v8f16(<8 x half> %src, <8 x half> %a ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s16, s5 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmpe.f16 s2, #0 +; CHECK-MVE-NEXT: vcmp.f16 s2, #0 ; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vmov r1, s16 @@ -1454,7 +1454,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ole_v8f16(<8 x half> %src, <8 x half> %a ; CHECK-MVE-NEXT: vseleq.f16 s16, s10, s6 ; CHECK-MVE-NEXT: vmov r1, s16 ; CHECK-MVE-NEXT: vmovx.f16 s16, s2 -; CHECK-MVE-NEXT: vcmpe.f16 s16, #0 +; CHECK-MVE-NEXT: vcmp.f16 s16, #0 ; CHECK-MVE-NEXT: vmov.16 q3[4], r1 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r1, #0 @@ -1464,11 +1464,11 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ole_v8f16(<8 x half> %src, <8 x half> %a ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s16, s6 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmpe.f16 s3, #0 +; CHECK-MVE-NEXT: vcmp.f16 s3, #0 ; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vmov r1, s16 -; CHECK-MVE-NEXT: vcmpe.f16 s0, #0 +; CHECK-MVE-NEXT: vcmp.f16 s0, #0 ; CHECK-MVE-NEXT: vmov.16 q3[5], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it ls @@ -1770,13 +1770,13 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ugt_v8f16(<8 x half> %src, <8 x half> %a ; CHECK-MVE-NEXT: vpush {d8, d9} ; CHECK-MVE-NEXT: vmovx.f16 s12, s0 ; CHECK-MVE-NEXT: movs r1, #0 -; CHECK-MVE-NEXT: vcmpe.f16 s12, #0 +; CHECK-MVE-NEXT: vcmp.f16 s12, #0 ; CHECK-MVE-NEXT: vmovx.f16 s12, s4 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it hi ; CHECK-MVE-NEXT: movhi r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmpe.f16 s0, #0 +; CHECK-MVE-NEXT: vcmp.f16 s0, #0 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s14, s8 ; CHECK-MVE-NEXT: lsls r1, r1, #31 @@ -1789,7 +1789,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ugt_v8f16(<8 x half> %src, <8 x half> %a ; CHECK-MVE-NEXT: cset r2, ne ; CHECK-MVE-NEXT: vmov r1, s12 ; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vcmpe.f16 s1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s1, #0 ; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vmov r2, s12 @@ -1807,7 +1807,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ugt_v8f16(<8 x half> %src, <8 x half> %a ; CHECK-MVE-NEXT: vseleq.f16 s16, s9, s5 ; CHECK-MVE-NEXT: vmov r1, s16 ; CHECK-MVE-NEXT: vmovx.f16 s16, s1 -; CHECK-MVE-NEXT: vcmpe.f16 s16, #0 +; CHECK-MVE-NEXT: vcmp.f16 s16, #0 ; CHECK-MVE-NEXT: vmov.16 q3[2], r1 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r1, #0 @@ -1817,7 +1817,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ugt_v8f16(<8 x half> %src, <8 x half> %a ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s16, s5 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmpe.f16 s2, #0 +; CHECK-MVE-NEXT: vcmp.f16 s2, #0 ; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vmov r1, s16 @@ -1832,7 +1832,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ugt_v8f16(<8 x half> %src, <8 x half> %a ; CHECK-MVE-NEXT: vseleq.f16 s16, s10, s6 ; CHECK-MVE-NEXT: vmov r1, s16 ; CHECK-MVE-NEXT: vmovx.f16 s16, s2 -; CHECK-MVE-NEXT: vcmpe.f16 s16, #0 +; CHECK-MVE-NEXT: vcmp.f16 s16, #0 ; CHECK-MVE-NEXT: vmov.16 q3[4], r1 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r1, #0 @@ -1842,11 +1842,11 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ugt_v8f16(<8 x half> %src, <8 x half> %a ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s16, s6 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmpe.f16 s3, #0 +; CHECK-MVE-NEXT: vcmp.f16 s3, #0 ; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vmov r1, s16 -; CHECK-MVE-NEXT: vcmpe.f16 s0, #0 +; CHECK-MVE-NEXT: vcmp.f16 s0, #0 ; CHECK-MVE-NEXT: vmov.16 q3[5], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it hi @@ -1891,13 +1891,13 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_uge_v8f16(<8 x half> %src, <8 x half> %a ; CHECK-MVE-NEXT: vpush {d8, d9} ; CHECK-MVE-NEXT: vmovx.f16 s12, s0 ; CHECK-MVE-NEXT: movs r1, #0 -; CHECK-MVE-NEXT: vcmpe.f16 s12, #0 +; CHECK-MVE-NEXT: vcmp.f16 s12, #0 ; CHECK-MVE-NEXT: vmovx.f16 s12, s4 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it pl ; CHECK-MVE-NEXT: movpl r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmpe.f16 s0, #0 +; CHECK-MVE-NEXT: vcmp.f16 s0, #0 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s14, s8 ; CHECK-MVE-NEXT: lsls r1, r1, #31 @@ -1910,7 +1910,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_uge_v8f16(<8 x half> %src, <8 x half> %a ; CHECK-MVE-NEXT: cset r2, ne ; CHECK-MVE-NEXT: vmov r1, s12 ; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vcmpe.f16 s1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s1, #0 ; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vmov r2, s12 @@ -1928,7 +1928,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_uge_v8f16(<8 x half> %src, <8 x half> %a ; CHECK-MVE-NEXT: vseleq.f16 s16, s9, s5 ; CHECK-MVE-NEXT: vmov r1, s16 ; CHECK-MVE-NEXT: vmovx.f16 s16, s1 -; CHECK-MVE-NEXT: vcmpe.f16 s16, #0 +; CHECK-MVE-NEXT: vcmp.f16 s16, #0 ; CHECK-MVE-NEXT: vmov.16 q3[2], r1 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r1, #0 @@ -1938,7 +1938,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_uge_v8f16(<8 x half> %src, <8 x half> %a ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s16, s5 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmpe.f16 s2, #0 +; CHECK-MVE-NEXT: vcmp.f16 s2, #0 ; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vmov r1, s16 @@ -1953,7 +1953,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_uge_v8f16(<8 x half> %src, <8 x half> %a ; CHECK-MVE-NEXT: vseleq.f16 s16, s10, s6 ; CHECK-MVE-NEXT: vmov r1, s16 ; CHECK-MVE-NEXT: vmovx.f16 s16, s2 -; CHECK-MVE-NEXT: vcmpe.f16 s16, #0 +; CHECK-MVE-NEXT: vcmp.f16 s16, #0 ; CHECK-MVE-NEXT: vmov.16 q3[4], r1 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r1, #0 @@ -1963,11 +1963,11 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_uge_v8f16(<8 x half> %src, <8 x half> %a ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s16, s6 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmpe.f16 s3, #0 +; CHECK-MVE-NEXT: vcmp.f16 s3, #0 ; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vmov r1, s16 -; CHECK-MVE-NEXT: vcmpe.f16 s0, #0 +; CHECK-MVE-NEXT: vcmp.f16 s0, #0 ; CHECK-MVE-NEXT: vmov.16 q3[5], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it pl @@ -2012,13 +2012,13 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ult_v8f16(<8 x half> %src, <8 x half> %a ; CHECK-MVE-NEXT: vpush {d8, d9} ; CHECK-MVE-NEXT: vmovx.f16 s12, s0 ; CHECK-MVE-NEXT: movs r1, #0 -; CHECK-MVE-NEXT: vcmpe.f16 s12, #0 +; CHECK-MVE-NEXT: vcmp.f16 s12, #0 ; CHECK-MVE-NEXT: vmovx.f16 s12, s4 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it lt ; CHECK-MVE-NEXT: movlt r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmpe.f16 s0, #0 +; CHECK-MVE-NEXT: vcmp.f16 s0, #0 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s14, s8 ; CHECK-MVE-NEXT: lsls r1, r1, #31 @@ -2031,7 +2031,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ult_v8f16(<8 x half> %src, <8 x half> %a ; CHECK-MVE-NEXT: cset r2, ne ; CHECK-MVE-NEXT: vmov r1, s12 ; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vcmpe.f16 s1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s1, #0 ; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vmov r2, s12 @@ -2049,7 +2049,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ult_v8f16(<8 x half> %src, <8 x half> %a ; CHECK-MVE-NEXT: vseleq.f16 s16, s9, s5 ; CHECK-MVE-NEXT: vmov r1, s16 ; CHECK-MVE-NEXT: vmovx.f16 s16, s1 -; CHECK-MVE-NEXT: vcmpe.f16 s16, #0 +; CHECK-MVE-NEXT: vcmp.f16 s16, #0 ; CHECK-MVE-NEXT: vmov.16 q3[2], r1 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r1, #0 @@ -2059,7 +2059,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ult_v8f16(<8 x half> %src, <8 x half> %a ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s16, s5 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmpe.f16 s2, #0 +; CHECK-MVE-NEXT: vcmp.f16 s2, #0 ; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vmov r1, s16 @@ -2074,7 +2074,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ult_v8f16(<8 x half> %src, <8 x half> %a ; CHECK-MVE-NEXT: vseleq.f16 s16, s10, s6 ; CHECK-MVE-NEXT: vmov r1, s16 ; CHECK-MVE-NEXT: vmovx.f16 s16, s2 -; CHECK-MVE-NEXT: vcmpe.f16 s16, #0 +; CHECK-MVE-NEXT: vcmp.f16 s16, #0 ; CHECK-MVE-NEXT: vmov.16 q3[4], r1 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r1, #0 @@ -2084,11 +2084,11 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ult_v8f16(<8 x half> %src, <8 x half> %a ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s16, s6 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmpe.f16 s3, #0 +; CHECK-MVE-NEXT: vcmp.f16 s3, #0 ; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vmov r1, s16 -; CHECK-MVE-NEXT: vcmpe.f16 s0, #0 +; CHECK-MVE-NEXT: vcmp.f16 s0, #0 ; CHECK-MVE-NEXT: vmov.16 q3[5], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it lt @@ -2133,13 +2133,13 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ule_v8f16(<8 x half> %src, <8 x half> %a ; CHECK-MVE-NEXT: vpush {d8, d9} ; CHECK-MVE-NEXT: vmovx.f16 s12, s0 ; CHECK-MVE-NEXT: movs r1, #0 -; CHECK-MVE-NEXT: vcmpe.f16 s12, #0 +; CHECK-MVE-NEXT: vcmp.f16 s12, #0 ; CHECK-MVE-NEXT: vmovx.f16 s12, s4 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it le ; CHECK-MVE-NEXT: movle r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmpe.f16 s0, #0 +; CHECK-MVE-NEXT: vcmp.f16 s0, #0 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s14, s8 ; CHECK-MVE-NEXT: lsls r1, r1, #31 @@ -2152,7 +2152,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ule_v8f16(<8 x half> %src, <8 x half> %a ; CHECK-MVE-NEXT: cset r2, ne ; CHECK-MVE-NEXT: vmov r1, s12 ; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vcmpe.f16 s1, #0 +; CHECK-MVE-NEXT: vcmp.f16 s1, #0 ; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vmov r2, s12 @@ -2170,7 +2170,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ule_v8f16(<8 x half> %src, <8 x half> %a ; CHECK-MVE-NEXT: vseleq.f16 s16, s9, s5 ; CHECK-MVE-NEXT: vmov r1, s16 ; CHECK-MVE-NEXT: vmovx.f16 s16, s1 -; CHECK-MVE-NEXT: vcmpe.f16 s16, #0 +; CHECK-MVE-NEXT: vcmp.f16 s16, #0 ; CHECK-MVE-NEXT: vmov.16 q3[2], r1 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r1, #0 @@ -2180,7 +2180,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ule_v8f16(<8 x half> %src, <8 x half> %a ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s16, s5 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmpe.f16 s2, #0 +; CHECK-MVE-NEXT: vcmp.f16 s2, #0 ; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vmov r1, s16 @@ -2195,7 +2195,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ule_v8f16(<8 x half> %src, <8 x half> %a ; CHECK-MVE-NEXT: vseleq.f16 s16, s10, s6 ; CHECK-MVE-NEXT: vmov r1, s16 ; CHECK-MVE-NEXT: vmovx.f16 s16, s2 -; CHECK-MVE-NEXT: vcmpe.f16 s16, #0 +; CHECK-MVE-NEXT: vcmp.f16 s16, #0 ; CHECK-MVE-NEXT: vmov.16 q3[4], r1 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r1, #0 @@ -2205,11 +2205,11 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ule_v8f16(<8 x half> %src, <8 x half> %a ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s16, s6 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmpe.f16 s3, #0 +; CHECK-MVE-NEXT: vcmp.f16 s3, #0 ; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vmov r1, s16 -; CHECK-MVE-NEXT: vcmpe.f16 s0, #0 +; CHECK-MVE-NEXT: vcmp.f16 s0, #0 ; CHECK-MVE-NEXT: vmov.16 q3[5], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it le @@ -2254,13 +2254,13 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ord_v8f16(<8 x half> %src, <8 x half> %a ; CHECK-MVE-NEXT: vpush {d8, d9} ; CHECK-MVE-NEXT: vmovx.f16 s12, s0 ; CHECK-MVE-NEXT: movs r1, #0 -; CHECK-MVE-NEXT: vcmpe.f16 s12, s12 +; CHECK-MVE-NEXT: vcmp.f16 s12, s12 ; CHECK-MVE-NEXT: vmovx.f16 s12, s4 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it vc ; CHECK-MVE-NEXT: movvc r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmpe.f16 s0, s0 +; CHECK-MVE-NEXT: vcmp.f16 s0, s0 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s14, s8 ; CHECK-MVE-NEXT: lsls r1, r1, #31 @@ -2273,7 +2273,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ord_v8f16(<8 x half> %src, <8 x half> %a ; CHECK-MVE-NEXT: cset r2, ne ; CHECK-MVE-NEXT: vmov r1, s12 ; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vcmpe.f16 s1, s1 +; CHECK-MVE-NEXT: vcmp.f16 s1, s1 ; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vmov r2, s12 @@ -2291,7 +2291,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ord_v8f16(<8 x half> %src, <8 x half> %a ; CHECK-MVE-NEXT: vseleq.f16 s16, s9, s5 ; CHECK-MVE-NEXT: vmov r1, s16 ; CHECK-MVE-NEXT: vmovx.f16 s16, s1 -; CHECK-MVE-NEXT: vcmpe.f16 s16, s16 +; CHECK-MVE-NEXT: vcmp.f16 s16, s16 ; CHECK-MVE-NEXT: vmov.16 q3[2], r1 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r1, #0 @@ -2301,7 +2301,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ord_v8f16(<8 x half> %src, <8 x half> %a ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s16, s5 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmpe.f16 s2, s2 +; CHECK-MVE-NEXT: vcmp.f16 s2, s2 ; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vmov r1, s16 @@ -2316,7 +2316,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ord_v8f16(<8 x half> %src, <8 x half> %a ; CHECK-MVE-NEXT: vseleq.f16 s16, s10, s6 ; CHECK-MVE-NEXT: vmov r1, s16 ; CHECK-MVE-NEXT: vmovx.f16 s16, s2 -; CHECK-MVE-NEXT: vcmpe.f16 s16, s16 +; CHECK-MVE-NEXT: vcmp.f16 s16, s16 ; CHECK-MVE-NEXT: vmov.16 q3[4], r1 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r1, #0 @@ -2326,11 +2326,11 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ord_v8f16(<8 x half> %src, <8 x half> %a ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s16, s6 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmpe.f16 s3, s3 +; CHECK-MVE-NEXT: vcmp.f16 s3, s3 ; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vmov r1, s16 -; CHECK-MVE-NEXT: vcmpe.f16 s0, s0 +; CHECK-MVE-NEXT: vcmp.f16 s0, s0 ; CHECK-MVE-NEXT: vmov.16 q3[5], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it vc @@ -2377,13 +2377,13 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_uno_v8f16(<8 x half> %src, <8 x half> %a ; CHECK-MVE-NEXT: vpush {d8, d9} ; CHECK-MVE-NEXT: vmovx.f16 s12, s0 ; CHECK-MVE-NEXT: movs r1, #0 -; CHECK-MVE-NEXT: vcmpe.f16 s12, s12 +; CHECK-MVE-NEXT: vcmp.f16 s12, s12 ; CHECK-MVE-NEXT: vmovx.f16 s12, s4 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r1, #1 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vcmpe.f16 s0, s0 +; CHECK-MVE-NEXT: vcmp.f16 s0, s0 ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s14, s8 ; CHECK-MVE-NEXT: lsls r1, r1, #31 @@ -2396,7 +2396,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_uno_v8f16(<8 x half> %src, <8 x half> %a ; CHECK-MVE-NEXT: cset r2, ne ; CHECK-MVE-NEXT: vmov r1, s12 ; CHECK-MVE-NEXT: lsls r2, r2, #31 -; CHECK-MVE-NEXT: vcmpe.f16 s1, s1 +; CHECK-MVE-NEXT: vcmp.f16 s1, s1 ; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vmov r2, s12 @@ -2414,7 +2414,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_uno_v8f16(<8 x half> %src, <8 x half> %a ; CHECK-MVE-NEXT: vseleq.f16 s16, s9, s5 ; CHECK-MVE-NEXT: vmov r1, s16 ; CHECK-MVE-NEXT: vmovx.f16 s16, s1 -; CHECK-MVE-NEXT: vcmpe.f16 s16, s16 +; CHECK-MVE-NEXT: vcmp.f16 s16, s16 ; CHECK-MVE-NEXT: vmov.16 q3[2], r1 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r1, #0 @@ -2424,7 +2424,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_uno_v8f16(<8 x half> %src, <8 x half> %a ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s16, s5 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmpe.f16 s2, s2 +; CHECK-MVE-NEXT: vcmp.f16 s2, s2 ; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vmov r1, s16 @@ -2439,7 +2439,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_uno_v8f16(<8 x half> %src, <8 x half> %a ; CHECK-MVE-NEXT: vseleq.f16 s16, s10, s6 ; CHECK-MVE-NEXT: vmov r1, s16 ; CHECK-MVE-NEXT: vmovx.f16 s16, s2 -; CHECK-MVE-NEXT: vcmpe.f16 s16, s16 +; CHECK-MVE-NEXT: vcmp.f16 s16, s16 ; CHECK-MVE-NEXT: vmov.16 q3[4], r1 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: mov.w r1, #0 @@ -2449,11 +2449,11 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_uno_v8f16(<8 x half> %src, <8 x half> %a ; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmovx.f16 s16, s6 ; CHECK-MVE-NEXT: lsls r1, r1, #31 -; CHECK-MVE-NEXT: vcmpe.f16 s3, s3 +; CHECK-MVE-NEXT: vcmp.f16 s3, s3 ; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vmov r1, s16 -; CHECK-MVE-NEXT: vcmpe.f16 s0, s0 +; CHECK-MVE-NEXT: vcmp.f16 s0, s0 ; CHECK-MVE-NEXT: vmov.16 q3[5], r1 ; CHECK-MVE-NEXT: mov.w r1, #0 ; CHECK-MVE-NEXT: it vs From a70c5261436322a53187d67b8bdc0445d0463a9a Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Tue, 8 Oct 2019 08:46:38 +0000 Subject: [PATCH 219/254] [LoopRotate] Unconditionally get ScalarEvolution. Summary: LoopRotate is a loop pass and SE should always be available. Reviewers: anemet, asbirlea Reviewed By: asbirlea Differential Revision: https://reviews.llvm.org/D68573 llvm-svn: 374026 --- llvm/lib/Transforms/Scalar/LoopRotation.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/LoopRotation.cpp b/llvm/lib/Transforms/Scalar/LoopRotation.cpp index 95e2316c20d84..3585b71748a68 100644 --- a/llvm/lib/Transforms/Scalar/LoopRotation.cpp +++ b/llvm/lib/Transforms/Scalar/LoopRotation.cpp @@ -96,15 +96,14 @@ class LoopRotateLegacyPass : public LoopPass { auto *AC = &getAnalysis().getAssumptionCache(F); auto *DTWP = getAnalysisIfAvailable(); auto *DT = DTWP ? &DTWP->getDomTree() : nullptr; - auto *SEWP = getAnalysisIfAvailable(); - auto *SE = SEWP ? &SEWP->getSE() : nullptr; + auto &SE = getAnalysis().getSE(); const SimplifyQuery SQ = getBestSimplifyQuery(*this, F); Optional MSSAU; if (EnableMSSALoopDependency) { MemorySSA *MSSA = &getAnalysis().getMSSA(); MSSAU = MemorySSAUpdater(MSSA); } - return LoopRotation(L, LI, TTI, AC, DT, SE, + return LoopRotation(L, LI, TTI, AC, DT, &SE, MSSAU.hasValue() ? MSSAU.getPointer() : nullptr, SQ, false, MaxHeaderSize, false); } From 2edc69c05d1e687c9cd12c5409d0da116d1e9521 Mon Sep 17 00:00:00 2001 From: Zi Xuan Wu Date: Tue, 8 Oct 2019 08:49:15 +0000 Subject: [PATCH 220/254] [NFC] Add REQUIRES for r374017 in testcase llvm-svn: 374027 --- llvm/test/Transforms/LoopVectorize/PowerPC/reg-usage.ll | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/reg-usage.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/reg-usage.ll index 7c48d6400eb7b..55593c306f7a1 100644 --- a/llvm/test/Transforms/LoopVectorize/PowerPC/reg-usage.ll +++ b/llvm/test/Transforms/LoopVectorize/PowerPC/reg-usage.ll @@ -1,5 +1,6 @@ ; RUN: opt < %s -debug-only=loop-vectorize -loop-vectorize -vectorizer-maximize-bandwidth -O2 -mtriple=powerpc64-unknown-linux -S -mcpu=pwr8 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-PWR8 ; RUN: opt < %s -debug-only=loop-vectorize -loop-vectorize -vectorizer-maximize-bandwidth -O2 -mtriple=powerpc64le-unknown-linux -S -mcpu=pwr9 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-PWR9 +; REQUIRES: asserts @a = global [1024 x i8] zeroinitializer, align 16 @b = global [1024 x i8] zeroinitializer, align 16 From eec98969603e3d79c73ed8955bcaa581cd5b455a Mon Sep 17 00:00:00 2001 From: George Rimar Date: Tue, 8 Oct 2019 08:59:12 +0000 Subject: [PATCH 221/254] [llvm-readobj/llvm-readelf] - Add checks for GNU-style to "all.test" test case. We do not check the GNU-style output when -all is given. This patch does that. Differential revision: https://reviews.llvm.org/D68462 llvm-svn: 374028 --- llvm/test/tools/llvm-readobj/all.test | 118 ++++++++++++++++++++++---- 1 file changed, 101 insertions(+), 17 deletions(-) diff --git a/llvm/test/tools/llvm-readobj/all.test b/llvm/test/tools/llvm-readobj/all.test index ac27f38c3a327..cc1aa29504a07 100644 --- a/llvm/test/tools/llvm-readobj/all.test +++ b/llvm/test/tools/llvm-readobj/all.test @@ -1,22 +1,40 @@ # RUN: yaml2obj %s -o %t.o -# RUN: llvm-readobj -a %t.o | FileCheck %s --check-prefix ALL -# RUN: llvm-readobj --all %t.o | FileCheck %s --check-prefix ALL +# RUN: llvm-readobj -a %t.o | FileCheck %s --check-prefix LLVM-ALL +# RUN: llvm-readobj --all %t.o | FileCheck %s --check-prefix LLVM-ALL -# ALL: Format: ELF32-i386 -# ALL: Arch: i386 -# ALL: AddressSize: 32bit -# ALL: LoadName: -# ALL: ElfHeader { -# ALL: Sections [ -# ALL: Relocations [ -# ALL: Symbols [ -# ALL: ProgramHeaders [ -# ALL: Version symbols { -# ALL: SHT_GNU_verdef { -# ALL: SHT_GNU_verneed { -# ALL: Addrsig [ -# ALL: Notes [ -# ALL: StackSizes [ +# LLVM-ALL: Format: ELF32-i386 +# LLVM-ALL: Arch: i386 +# LLVM-ALL: AddressSize: 32bit +# LLVM-ALL: LoadName: +# LLVM-ALL: ElfHeader { +# LLVM-ALL: Sections [ +# LLVM-ALL: Relocations [ +# LLVM-ALL: Symbols [ +# LLVM-ALL: ProgramHeaders [ +# LLVM-ALL: Version symbols { +# LLVM-ALL: SHT_GNU_verdef { +# LLVM-ALL: SHT_GNU_verneed { +# LLVM-ALL: Addrsig [ +# LLVM-ALL: Notes [ +# LLVM-ALL: StackSizes [ + +# RUN: llvm-readelf -a %t.o | FileCheck %s --check-prefix GNU-ALL +# RUN: llvm-readelf --all %t.o | FileCheck %s --check-prefix GNU-ALL + +# GNU-ALL: ELF Header: +# GNU-ALL: There are {{.*}} section headers, starting at offset {{.*}}: +# GNU-ALL: Relocation section '.rela.data' at offset {{.*}} contains {{.*}} entries: +# GNU-ALL: Symbol table '.symtab' contains {{.*}} entries: +# GNU-ALL: EH_FRAME Header [ +# GNU-ALL: .eh_frame section at offset {{.*}} address 0x0: +# GNU-ALL: Dynamic section at offset {{.*}} contains {{.*}} entries: +# GNU-ALL: Program Headers: +# GNU-ALL: Version symbols section '.gnu.version' contains {{.*}} entries: +# GNU-ALL: Version definition section '.gnu.version_d' contains {{.*}} entries: +# GNU-ALL: Version needs section '.gnu.version_r' contains {{.*}} entries: +# GNU-ALL: There are no section groups in this file. +# GNU-ALL: Histogram for bucket list length (total of 1 buckets) +# GNU-ALL: Displaying notes found at file offset {{.*}} with length {{.*}}: --- !ELF FileHeader: @@ -24,3 +42,69 @@ FileHeader: Data: ELFDATA2LSB Type: ET_REL Machine: EM_386 +Sections: + - Name: .data + Type: SHT_PROGBITS + - Name: .rela.data + Type: SHT_REL + Relocations: + - Name: .gnu.version + Type: SHT_GNU_versym + Entries: [ 0 ] + - Name: .gnu.version_d + Type: SHT_GNU_verdef + Info: 0x0 + Entries: [] + - Name: .gnu.version_r + Type: SHT_GNU_verneed + Info: 0x0 + Dependencies: + - Version: 1 + File: verneed1.so.0 + Entries: [] + - Name: .dynamic + Type: SHT_DYNAMIC + Address: 0x1000 + AddressAlign: 0x1000 + Entries: + - Tag: DT_HASH + Value: 0x1100 + - Tag: DT_NULL + Value: 0 + - Name: .hash + Type: SHT_HASH + Link: 0 + Bucket: [ 1 ] + Chain: [ 0, 0 ] + Address: 0x1100 + AddressAlign: 0x100 + - Name: .eh_frame_hdr + Type: SHT_PROGBITS +## An arbitrary linker-generated valid content. + Content: 011b033b140000000100000000f0ffff30000000 + - Name: .eh_frame + Type: SHT_PROGBITS + AddressAlign: 8 +## An arbitrary linker-generated valid content. + Content: 1400000000000000017a5200017810011b0c070890010000100000001c000000c8efffff0100000000000000 + - Name: .note.gnu.build-id + Type: SHT_NOTE + Flags: [ SHF_ALLOC ] + Address: 0x1500 +## An arbitrary linker-generated valid content. + Content: 040000001000000003000000474E55004FCB712AA6387724A9F465A32CD8C14B +ProgramHeaders: + - Type: PT_LOAD + VAddr: 0x1000 + Sections: + - Section: .dynamic + - Section: .hash + - Type: PT_DYNAMIC + Sections: + - Section: .dynamic + - Type: PT_GNU_EH_FRAME + Sections: + - Section: .eh_frame_hdr + - Type: PT_NOTE + Sections: + - Section: .note.gnu.build-id From 07932b108898f64ad5429f9cd23d06baa9e914cb Mon Sep 17 00:00:00 2001 From: Pavel Labath Date: Tue, 8 Oct 2019 09:05:25 +0000 Subject: [PATCH 222/254] Fix a -Wpedantic warning namespace-closing '}' don't need ';'. llvm-svn: 374029 --- lldb/include/lldb/API/SBCommandReturnObject.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lldb/include/lldb/API/SBCommandReturnObject.h b/lldb/include/lldb/API/SBCommandReturnObject.h index d22c3ef411769..6aed32089ce95 100644 --- a/lldb/include/lldb/API/SBCommandReturnObject.h +++ b/lldb/include/lldb/API/SBCommandReturnObject.h @@ -17,7 +17,7 @@ namespace lldb_private { class SBCommandReturnObjectImpl; -}; +} namespace lldb { From c41294705bbb9457524df91c525d53ade53b304a Mon Sep 17 00:00:00 2001 From: Pavel Labath Date: Tue, 8 Oct 2019 09:05:31 +0000 Subject: [PATCH 223/254] Revert "[lldb-server/android] Show more processes and package name when necessary" This reverts r373758 because it causes several to test to be flaky (= failing ~90% of the time) on linux. llvm-svn: 374030 --- lldb/source/Host/linux/Host.cpp | 79 ++++++++++++++------------------- 1 file changed, 34 insertions(+), 45 deletions(-) diff --git a/lldb/source/Host/linux/Host.cpp b/lldb/source/Host/linux/Host.cpp index c2e46e55add1a..f6a8766a71c56 100644 --- a/lldb/source/Host/linux/Host.cpp +++ b/lldb/source/Host/linux/Host.cpp @@ -144,79 +144,68 @@ static ArchSpec GetELFProcessCPUType(llvm::StringRef exe_path) { } } -static void GetProcessArgs(::pid_t pid, ProcessInstanceInfo &process_info) { - auto BufferOrError = getProcFile(pid, "cmdline"); - if (!BufferOrError) - return; - std::unique_ptr Cmdline = std::move(*BufferOrError); - - llvm::StringRef Arg0, Rest; - std::tie(Arg0, Rest) = Cmdline->getBuffer().split('\0'); - process_info.SetArg0(Arg0); - while (!Rest.empty()) { - llvm::StringRef Arg; - std::tie(Arg, Rest) = Rest.split('\0'); - process_info.GetArguments().AppendArgument(Arg); - } -} +static bool GetProcessAndStatInfo(::pid_t pid, + ProcessInstanceInfo &process_info, + ProcessState &State, ::pid_t &tracerpid) { + tracerpid = 0; + process_info.Clear(); -static void GetExePathAndArch(::pid_t pid, ProcessInstanceInfo &process_info) { Log *log(GetLogIfAllCategoriesSet(LIBLLDB_LOG_PROCESS)); - std::string ExePath(PATH_MAX, '\0'); // We can't use getProcFile here because proc/[pid]/exe is a symbolic link. llvm::SmallString<64> ProcExe; (llvm::Twine("/proc/") + llvm::Twine(pid) + "/exe").toVector(ProcExe); + std::string ExePath(PATH_MAX, '\0'); ssize_t len = readlink(ProcExe.c_str(), &ExePath[0], PATH_MAX); - if (len > 0) { - ExePath.resize(len); - } else { + if (len <= 0) { LLDB_LOG(log, "failed to read link exe link for {0}: {1}", pid, Status(errno, eErrorTypePOSIX)); - ExePath.resize(0); + return false; } + ExePath.resize(len); + // If the binary has been deleted, the link name has " (deleted)" appended. // Remove if there. llvm::StringRef PathRef = ExePath; PathRef.consume_back(" (deleted)"); - if (!PathRef.empty()) { - process_info.GetExecutableFile().SetFile(PathRef, FileSpec::Style::native); - process_info.SetArchitecture(GetELFProcessCPUType(PathRef)); - } -} + process_info.SetArchitecture(GetELFProcessCPUType(PathRef)); -static void GetProcessEnviron(::pid_t pid, ProcessInstanceInfo &process_info) { // Get the process environment. auto BufferOrError = getProcFile(pid, "environ"); if (!BufferOrError) - return; - + return false; std::unique_ptr Environ = std::move(*BufferOrError); + + // Get the command line used to start the process. + BufferOrError = getProcFile(pid, "cmdline"); + if (!BufferOrError) + return false; + std::unique_ptr Cmdline = std::move(*BufferOrError); + + // Get User and Group IDs and get tracer pid. + if (!GetStatusInfo(pid, process_info, State, tracerpid)) + return false; + + process_info.SetProcessID(pid); + process_info.GetExecutableFile().SetFile(PathRef, FileSpec::Style::native); + llvm::StringRef Rest = Environ->getBuffer(); while (!Rest.empty()) { llvm::StringRef Var; std::tie(Var, Rest) = Rest.split('\0'); process_info.GetEnvironment().insert(Var); } -} - -static bool GetProcessAndStatInfo(::pid_t pid, - ProcessInstanceInfo &process_info, - ProcessState &State, ::pid_t &tracerpid) { - tracerpid = 0; - process_info.Clear(); - process_info.SetProcessID(pid); - - GetExePathAndArch(pid, process_info); - GetProcessArgs(pid, process_info); - GetProcessEnviron(pid, process_info); - - // Get User and Group IDs and get tracer pid. - if (!GetStatusInfo(pid, process_info, State, tracerpid)) - return false; + llvm::StringRef Arg0; + std::tie(Arg0, Rest) = Cmdline->getBuffer().split('\0'); + process_info.SetArg0(Arg0); + while (!Rest.empty()) { + llvm::StringRef Arg; + std::tie(Arg, Rest) = Rest.split('\0'); + process_info.GetArguments().AppendArgument(Arg); + } return true; } From 4919534ae4d4029982d5a5ad7ea18f4a681cb602 Mon Sep 17 00:00:00 2001 From: Clement Courbet Date: Tue, 8 Oct 2019 09:06:48 +0000 Subject: [PATCH 224/254] [llvm-exegesis] Finish plumbing the `Config` field. Summary: Right now there are no snippet generators that emit the `Config` Field, but I plan to add it to investigate LEA operands for PR32326. What was broken was: - `Config` Was not propagated up until the BenchmarkResult::Key. - Clustering should really consider different configs as measuring different things, so we should stabilize on (Opcode, Config) instead of just Opcode. Reviewers: gchatelet Subscribers: tschuett, llvm-commits, lebedev.ri Tags: #llvm Differential Revision: https://reviews.llvm.org/D68629 llvm-svn: 374031 --- ...analysis-cluster-stabilization-config.test | 3 +- llvm/tools/llvm-exegesis/lib/BenchmarkCode.h | 9 +--- .../tools/llvm-exegesis/lib/BenchmarkResult.h | 2 +- .../llvm-exegesis/lib/BenchmarkRunner.cpp | 22 ++++---- llvm/tools/llvm-exegesis/lib/Clustering.cpp | 52 +++++++++---------- llvm/tools/llvm-exegesis/lib/CodeTemplate.h | 2 + llvm/tools/llvm-exegesis/lib/SnippetFile.cpp | 4 +- .../llvm-exegesis/lib/SnippetGenerator.cpp | 5 +- .../llvm-exegesis/X86/SnippetFileTest.cpp | 4 +- 9 files changed, 49 insertions(+), 54 deletions(-) diff --git a/llvm/test/tools/llvm-exegesis/X86/analysis-cluster-stabilization-config.test b/llvm/test/tools/llvm-exegesis/X86/analysis-cluster-stabilization-config.test index 0403af4a229a5..6fa4621b1b5f8 100644 --- a/llvm/test/tools/llvm-exegesis/X86/analysis-cluster-stabilization-config.test +++ b/llvm/test/tools/llvm-exegesis/X86/analysis-cluster-stabilization-config.test @@ -4,8 +4,7 @@ # have different configs, so they should not be placed in the same cluster by # stabilization. -# CHECK-UNSTABLE: SQRTSSr -# CHECK-UNSTABLE: SQRTSSr +# CHECK-UNSTABLE-NOT: SQRTSSr --- mode: latency diff --git a/llvm/tools/llvm-exegesis/lib/BenchmarkCode.h b/llvm/tools/llvm-exegesis/lib/BenchmarkCode.h index 1976004c251a3..7dceb25b50762 100644 --- a/llvm/tools/llvm-exegesis/lib/BenchmarkCode.h +++ b/llvm/tools/llvm-exegesis/lib/BenchmarkCode.h @@ -9,7 +9,7 @@ #ifndef LLVM_TOOLS_LLVM_EXEGESIS_BENCHMARKCODE_H #define LLVM_TOOLS_LLVM_EXEGESIS_BENCHMARKCODE_H -#include "RegisterValue.h" +#include "BenchmarkResult.h" #include "llvm/MC/MCInst.h" #include #include @@ -19,12 +19,7 @@ namespace exegesis { // A collection of instructions that are to be assembled, executed and measured. struct BenchmarkCode { - // The sequence of instructions that are to be repeated. - std::vector Instructions; - - // Before the code is executed some instructions are added to setup the - // registers initial values. - std::vector RegisterInitialValues; + InstructionBenchmarkKey Key; // We also need to provide the registers that are live on entry for the // assembler to generate proper prologue/epilogue. diff --git a/llvm/tools/llvm-exegesis/lib/BenchmarkResult.h b/llvm/tools/llvm-exegesis/lib/BenchmarkResult.h index 17ffd0a8c8703..132dc36622a88 100644 --- a/llvm/tools/llvm-exegesis/lib/BenchmarkResult.h +++ b/llvm/tools/llvm-exegesis/lib/BenchmarkResult.h @@ -15,8 +15,8 @@ #ifndef LLVM_TOOLS_LLVM_EXEGESIS_BENCHMARKRESULT_H #define LLVM_TOOLS_LLVM_EXEGESIS_BENCHMARKRESULT_H -#include "BenchmarkCode.h" #include "LlvmState.h" +#include "RegisterValue.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/MC/MCInst.h" diff --git a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp index 4b541f4d829de..da26bc458dcf2 100644 --- a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp +++ b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp @@ -31,7 +31,6 @@ BenchmarkRunner::BenchmarkRunner(const LLVMState &State, BenchmarkRunner::~BenchmarkRunner() = default; - namespace { class FunctionExecutorImpl : public BenchmarkRunner::FunctionExecutor { public: @@ -92,10 +91,9 @@ InstructionBenchmark BenchmarkRunner::runConfiguration( InstrBenchmark.NumRepetitions = NumRepetitions; InstrBenchmark.Info = BC.Info; - const std::vector &Instructions = BC.Instructions; + const std::vector &Instructions = BC.Key.Instructions; - InstrBenchmark.Key.Instructions = Instructions; - InstrBenchmark.Key.RegisterInitialValues = BC.RegisterInitialValues; + InstrBenchmark.Key = BC.Key; // Assemble at least kMinInstructionsForSnippet instructions by repeating the // snippet for debug/analysis. This is so that the user clearly understands @@ -104,10 +102,10 @@ InstructionBenchmark BenchmarkRunner::runConfiguration( { llvm::SmallString<0> Buffer; llvm::raw_svector_ostream OS(Buffer); - assembleToStream( - State.getExegesisTarget(), State.createTargetMachine(), BC.LiveIns, - BC.RegisterInitialValues, - Repetitor.Repeat(BC.Instructions, kMinInstructionsForSnippet), OS); + assembleToStream(State.getExegesisTarget(), State.createTargetMachine(), + BC.LiveIns, BC.Key.RegisterInitialValues, + Repetitor.Repeat(Instructions, kMinInstructionsForSnippet), + OS); const ExecutableFunction EF(State.createTargetMachine(), getObjectFromBuffer(OS.str())); const auto FnBytes = EF.getFunctionBytes(); @@ -117,7 +115,7 @@ InstructionBenchmark BenchmarkRunner::runConfiguration( // Assemble NumRepetitions instructions repetitions of the snippet for // measurements. const auto Filler = - Repetitor.Repeat(BC.Instructions, InstrBenchmark.NumRepetitions); + Repetitor.Repeat(Instructions, InstrBenchmark.NumRepetitions); llvm::object::OwningBinary ObjectFile; if (DumpObjectToDisk) { @@ -133,7 +131,7 @@ InstructionBenchmark BenchmarkRunner::runConfiguration( llvm::SmallString<0> Buffer; llvm::raw_svector_ostream OS(Buffer); assembleToStream(State.getExegesisTarget(), State.createTargetMachine(), - BC.LiveIns, BC.RegisterInitialValues, Filler, OS); + BC.LiveIns, BC.Key.RegisterInitialValues, Filler, OS); ObjectFile = getObjectFromBuffer(OS.str()); } @@ -150,7 +148,7 @@ InstructionBenchmark BenchmarkRunner::runConfiguration( // Scale the measurements by instruction. BM.PerInstructionValue /= InstrBenchmark.NumRepetitions; // Scale the measurements by snippet. - BM.PerSnippetValue *= static_cast(BC.Instructions.size()) / + BM.PerSnippetValue *= static_cast(Instructions.size()) / InstrBenchmark.NumRepetitions; } @@ -167,7 +165,7 @@ BenchmarkRunner::writeObjectFile(const BenchmarkCode &BC, return std::move(E); llvm::raw_fd_ostream OFS(ResultFD, true /*ShouldClose*/); assembleToStream(State.getExegesisTarget(), State.createTargetMachine(), - BC.LiveIns, BC.RegisterInitialValues, FillFunction, OFS); + BC.LiveIns, BC.Key.RegisterInitialValues, FillFunction, OFS); return ResultPath.str(); } diff --git a/llvm/tools/llvm-exegesis/lib/Clustering.cpp b/llvm/tools/llvm-exegesis/lib/Clustering.cpp index 398bbf776af74..5df47933e7127 100644 --- a/llvm/tools/llvm-exegesis/lib/Clustering.cpp +++ b/llvm/tools/llvm-exegesis/lib/Clustering.cpp @@ -237,39 +237,40 @@ void InstructionBenchmarkClustering::clusterizeNaive(unsigned NumOpcodes) { // We shall find every opcode with benchmarks not in just one cluster, and move // *all* the benchmarks of said Opcode into one new unstable cluster per Opcode. void InstructionBenchmarkClustering::stabilize(unsigned NumOpcodes) { - // Given an instruction Opcode, in which clusters do benchmarks of this - // instruction lie? Normally, they all should be in the same cluster. - std::vector> OpcodeToClusterIDs; - OpcodeToClusterIDs.resize(NumOpcodes); - // The list of opcodes that have more than one cluster. - llvm::SetVector UnstableOpcodes; - // Populate OpcodeToClusterIDs and UnstableOpcodes data structures. + // Given an instruction Opcode and Config, in which clusters do benchmarks of + // this instruction lie? Normally, they all should be in the same cluster. + struct OpcodeAndConfig { + explicit OpcodeAndConfig(const InstructionBenchmark &IB) + : Opcode(IB.keyInstruction().getOpcode()), Config(&IB.Key.Config) {} + unsigned Opcode; + const std::string *Config; + + auto Tie() const -> auto { return std::tie(Opcode, *Config); } + + bool operator<(const OpcodeAndConfig &O) const { return Tie() < O.Tie(); } + bool operator!=(const OpcodeAndConfig &O) const { return Tie() != O.Tie(); } + }; + std::map> + OpcodeConfigToClusterIDs; + // Populate OpcodeConfigToClusterIDs and UnstableOpcodes data structures. assert(ClusterIdForPoint_.size() == Points_.size() && "size mismatch"); for (const auto &Point : zip(Points_, ClusterIdForPoint_)) { const ClusterId &ClusterIdOfPoint = std::get<1>(Point); if (!ClusterIdOfPoint.isValid()) continue; // Only process fully valid clusters. - const unsigned Opcode = std::get<0>(Point).keyInstruction().getOpcode(); - assert(Opcode < NumOpcodes && "NumOpcodes is incorrect (too small)"); + const OpcodeAndConfig Key(std::get<0>(Point)); llvm::SmallSet &ClusterIDsOfOpcode = - OpcodeToClusterIDs[Opcode]; + OpcodeConfigToClusterIDs[Key]; ClusterIDsOfOpcode.insert(ClusterIdOfPoint); - // Is there more than one ClusterID for this opcode?. - if (ClusterIDsOfOpcode.size() < 2) - continue; // If not, then at this moment this Opcode is stable. - // Else let's record this unstable opcode for future use. - UnstableOpcodes.insert(Opcode); } - assert(OpcodeToClusterIDs.size() == NumOpcodes && "sanity check"); - // We know with how many [new] clusters we will end up with. - const auto NewTotalClusterCount = Clusters_.size() + UnstableOpcodes.size(); - Clusters_.reserve(NewTotalClusterCount); - for (const size_t UnstableOpcode : UnstableOpcodes.getArrayRef()) { + for (const auto &OpcodeConfigToClusterID : OpcodeConfigToClusterIDs) { const llvm::SmallSet &ClusterIDs = - OpcodeToClusterIDs[UnstableOpcode]; - assert(ClusterIDs.size() > 1 && - "Should only have Opcodes with more than one cluster."); + OpcodeConfigToClusterID.second; + const OpcodeAndConfig &Key = OpcodeConfigToClusterID.first; + // We only care about unstable instructions. + if (ClusterIDs.size() < 2) + continue; // Create a new unstable cluster, one per Opcode. Clusters_.emplace_back(ClusterId::makeValidUnstable(Clusters_.size())); @@ -290,8 +291,8 @@ void InstructionBenchmarkClustering::stabilize(unsigned NumOpcodes) { // and the rest of the points is for the UnstableOpcode. const auto it = std::stable_partition( OldCluster.PointIndices.begin(), OldCluster.PointIndices.end(), - [this, UnstableOpcode](size_t P) { - return Points_[P].keyInstruction().getOpcode() != UnstableOpcode; + [this, &Key](size_t P) { + return OpcodeAndConfig(Points_[P]) != Key; }); assert(std::distance(it, OldCluster.PointIndices.end()) > 0 && "Should have found at least one bad point"); @@ -314,7 +315,6 @@ void InstructionBenchmarkClustering::stabilize(unsigned NumOpcodes) { "New unstable cluster should end up with no less points than there " "was clusters"); } - assert(Clusters_.size() == NewTotalClusterCount && "sanity check"); } llvm::Expected diff --git a/llvm/tools/llvm-exegesis/lib/CodeTemplate.h b/llvm/tools/llvm-exegesis/lib/CodeTemplate.h index 2edd514816957..d782296ed33d6 100644 --- a/llvm/tools/llvm-exegesis/lib/CodeTemplate.h +++ b/llvm/tools/llvm-exegesis/lib/CodeTemplate.h @@ -115,6 +115,8 @@ struct CodeTemplate { CodeTemplate &operator=(const CodeTemplate &) = delete; ExecutionMode Execution = ExecutionMode::UNKNOWN; + // See InstructionBenchmarkKey.::Config. + std::string Config; // Some information about how this template has been created. std::string Info; // The list of the instructions for this template. diff --git a/llvm/tools/llvm-exegesis/lib/SnippetFile.cpp b/llvm/tools/llvm-exegesis/lib/SnippetFile.cpp index f5666ecab9548..63df5c634537b 100644 --- a/llvm/tools/llvm-exegesis/lib/SnippetFile.cpp +++ b/llvm/tools/llvm-exegesis/lib/SnippetFile.cpp @@ -36,7 +36,7 @@ class BenchmarkCodeStreamer : public MCStreamer, public AsmCommentConsumer { // instructions. void EmitInstruction(const MCInst &Instruction, const MCSubtargetInfo &STI) override { - Result->Instructions.push_back(Instruction); + Result->Key.Instructions.push_back(Instruction); } // Implementation of the AsmCommentConsumer. @@ -65,7 +65,7 @@ class BenchmarkCodeStreamer : public MCStreamer, public AsmCommentConsumer { const StringRef HexValue = Parts[1].trim(); RegVal.Value = APInt( /* each hex digit is 4 bits */ HexValue.size() * 4, HexValue, 16); - Result->RegisterInitialValues.push_back(std::move(RegVal)); + Result->Key.RegisterInitialValues.push_back(std::move(RegVal)); return; } if (CommentText.consume_front("LIVEIN")) { diff --git a/llvm/tools/llvm-exegesis/lib/SnippetGenerator.cpp b/llvm/tools/llvm-exegesis/lib/SnippetGenerator.cpp index 267ab13163345..879962001e6bb 100644 --- a/llvm/tools/llvm-exegesis/lib/SnippetGenerator.cpp +++ b/llvm/tools/llvm-exegesis/lib/SnippetGenerator.cpp @@ -73,12 +73,13 @@ SnippetGenerator::generateConfigurations( BC.Info = CT.Info; for (InstructionTemplate &IT : CT.Instructions) { randomizeUnsetVariables(State.getExegesisTarget(), ForbiddenRegs, IT); - BC.Instructions.push_back(IT.build()); + BC.Key.Instructions.push_back(IT.build()); } if (CT.ScratchSpacePointerInReg) BC.LiveIns.push_back(CT.ScratchSpacePointerInReg); - BC.RegisterInitialValues = + BC.Key.RegisterInitialValues = computeRegisterInitialValues(CT.Instructions); + BC.Key.Config = CT.Config; Output.push_back(std::move(BC)); } } diff --git a/llvm/unittests/tools/llvm-exegesis/X86/SnippetFileTest.cpp b/llvm/unittests/tools/llvm-exegesis/X86/SnippetFileTest.cpp index 69dd689fc492d..04ba51cef2779 100644 --- a/llvm/unittests/tools/llvm-exegesis/X86/SnippetFileTest.cpp +++ b/llvm/unittests/tools/llvm-exegesis/X86/SnippetFileTest.cpp @@ -78,8 +78,8 @@ TEST_F(X86SnippetFileTest, Works) { EXPECT_FALSE((bool)Snippets.takeError()); ASSERT_THAT(*Snippets, SizeIs(1)); const auto &Snippet = (*Snippets)[0]; - ASSERT_THAT(Snippet.Instructions, ElementsAre(HasOpcode(X86::INC64r))); - ASSERT_THAT(Snippet.RegisterInitialValues, + ASSERT_THAT(Snippet.Key.Instructions, ElementsAre(HasOpcode(X86::INC64r))); + ASSERT_THAT(Snippet.Key.RegisterInitialValues, ElementsAre(RegisterInitialValueIs(X86::RAX, 15), RegisterInitialValueIs(X86::SIL, 0))); ASSERT_THAT(Snippet.LiveIns, ElementsAre(X86::RDI, X86::DL)); From fb190c82983589eedfde7d1424c350eeb3d00050 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Tue, 8 Oct 2019 09:17:46 +0000 Subject: [PATCH 225/254] Remove an useless allocation (from by clang-analyzer/scan-build) https://llvm.org/reports/scan-build/report-TargetInfo.cpp-detectFPCCEligibleStruct-9-1.html#EndPath llvm-svn: 374032 --- clang/lib/CodeGen/TargetInfo.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp index b281db332143d..c2c7b8bf653b9 100644 --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -9438,7 +9438,7 @@ bool RISCVABIInfo::detectFPCCEligibleStruct(QualType Ty, llvm::Type *&Field1Ty, Ty, CharUnits::Zero(), Field1Ty, Field1Off, Field2Ty, Field2Off); // Not really a candidate if we have a single int but no float. if (Field1Ty && !Field2Ty && !Field1Ty->isFloatingPointTy()) - return IsCandidate = false; + return false; if (!IsCandidate) return false; if (Field1Ty && Field1Ty->isFloatingPointTy()) From 02682498b86a72a53415a3676042b1a7d30ccbdc Mon Sep 17 00:00:00 2001 From: Nikola Prica Date: Tue, 8 Oct 2019 09:43:05 +0000 Subject: [PATCH 226/254] [ISEL][ARM][AARCH64] Tracking simple parameter forwarding registers Support for tracking registers that forward function parameters into the following function frame. For now we only support cases when parameter is forwarded through single register. Reviewers: aprantl, vsk, t.p.northover Reviewed By: vsk Differential Revision: https://reviews.llvm.org/D66953 llvm-svn: 374033 --- .../Target/AArch64/AArch64ISelLowering.cpp | 17 +++++++- llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp | 6 ++- llvm/lib/Target/ARM/ARMISelLowering.cpp | 9 +++- .../AArch64/call-site-info-output.ll | 41 +++++++++++++++++++ .../DebugInfo/ARM/call-site-info-output.ll | 41 +++++++++++++++++++ 5 files changed, 111 insertions(+), 3 deletions(-) create mode 100644 llvm/test/DebugInfo/AArch64/call-site-info-output.ll create mode 100644 llvm/test/DebugInfo/ARM/call-site-info-output.ll diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index bec14001ed840..c7302b45f6516 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -3692,6 +3692,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, bool IsVarArg = CLI.IsVarArg; MachineFunction &MF = DAG.getMachineFunction(); + MachineFunction::CallSiteInfo CSInfo; bool IsThisReturn = false; AArch64FunctionInfo *FuncInfo = MF.getInfo(); @@ -3889,9 +3890,20 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, }) ->second; Bits = DAG.getNode(ISD::OR, DL, Bits.getValueType(), Bits, Arg); + // Call site info is used for function's parameter entry value + // tracking. For now we track only simple cases when parameter + // is transferred through whole register. + CSInfo.erase(std::remove_if(CSInfo.begin(), CSInfo.end(), + [&VA](MachineFunction::ArgRegPair ArgReg) { + return ArgReg.Reg == VA.getLocReg(); + }), + CSInfo.end()); } else { RegsToPass.emplace_back(VA.getLocReg(), Arg); RegsUsed.insert(VA.getLocReg()); + const TargetOptions &Options = DAG.getTarget().Options; + if (Options.EnableDebugEntryValues) + CSInfo.emplace_back(VA.getLocReg(), i); } } else { assert(VA.isMemLoc()); @@ -4072,12 +4084,15 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, // actual call instruction. if (IsTailCall) { MF.getFrameInfo().setHasTailCall(); - return DAG.getNode(AArch64ISD::TC_RETURN, DL, NodeTys, Ops); + SDValue Ret = DAG.getNode(AArch64ISD::TC_RETURN, DL, NodeTys, Ops); + DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo)); + return Ret; } // Returns a chain and a flag for retval copy to use. Chain = DAG.getNode(AArch64ISD::CALL, DL, NodeTys, Ops); InFlag = Chain.getValue(1); + DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo)); uint64_t CalleePopBytes = DoesCalleeRestoreStack(CallConv, TailCallOpt) ? alignTo(NumBytes, 16) : 0; diff --git a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp index 73ebe0940f327..3724c0fd26580 100644 --- a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -1205,8 +1205,11 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, for (unsigned i = 1, e = MBBI->getNumOperands(); i != e; ++i) NewMI->addOperand(MBBI->getOperand(i)); - // Delete the pseudo instruction TCRETURN. + + // Update call site info and delete the pseudo instruction TCRETURN. + MBB.getParent()->updateCallSiteInfo(&MI, &*NewMI); MBB.erase(MBBI); + MBBI = NewMI; return true; } @@ -1436,6 +1439,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, MIB.cloneMemRefs(MI); TransferImpOps(MI, MIB, MIB); + MI.getMF()->updateCallSiteInfo(&MI, &*MIB); MI.eraseFromParent(); return true; } diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index ec5537087985d..c1365f5893038 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -2040,6 +2040,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, bool isVarArg = CLI.IsVarArg; MachineFunction &MF = DAG.getMachineFunction(); + MachineFunction::CallSiteInfo CSInfo; bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet(); bool isThisReturn = false; auto Attr = MF.getFunction().getFnAttribute("disable-tail-calls"); @@ -2164,6 +2165,9 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, "unexpected use of 'returned'"); isThisReturn = true; } + const TargetOptions &Options = DAG.getTarget().Options; + if (Options.EnableDebugEntryValues) + CSInfo.emplace_back(VA.getLocReg(), i); RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); } else if (isByVal) { assert(VA.isMemLoc()); @@ -2399,12 +2403,15 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); if (isTailCall) { MF.getFrameInfo().setHasTailCall(); - return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, Ops); + SDValue Ret = DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, Ops); + DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo)); + return Ret; } // Returns a chain and a flag for retval copy to use. Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops); InFlag = Chain.getValue(1); + DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo)); Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true), DAG.getIntPtrConstant(0, dl, true), InFlag, dl); diff --git a/llvm/test/DebugInfo/AArch64/call-site-info-output.ll b/llvm/test/DebugInfo/AArch64/call-site-info-output.ll new file mode 100644 index 0000000000000..d52d6962f3c41 --- /dev/null +++ b/llvm/test/DebugInfo/AArch64/call-site-info-output.ll @@ -0,0 +1,41 @@ +; RUN: llc -mtriple aarch64-linux-gnu -debug-entry-values %s -o - -stop-before=finalize-isel | FileCheck %s +; Verify that Selection DAG knows how to recognize simple function parameter forwarding registers. +; Produced from: +; extern int fn1(int,int,int); +; int fn2(int a, int b, int c) { +; int local = fn1(a+b, c, 10); +; if (local > 10) +; return local + 10; +; return local; +; } +; clang -g -O2 -target aarch64-linux-gnu -S -emit-llvm %s +; CHECK: callSites: +; CHECK-NEXT: - { bb: {{.*}}, offset: {{.*}}, fwdArgRegs: +; CHECK-NEXT: - { arg: 0, reg: '$w0' } +; CHECK-NEXT: - { arg: 1, reg: '$w1' } +; CHECK-NEXT: - { arg: 2, reg: '$w2' } } + +; ModuleID = 'call-site-info-output.c' +source_filename = "call-site-info-output.c" +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-unknown-linux-gnu" + +; Function Attrs: nounwind +define dso_local i32 @fn2(i32 %a, i32 %b, i32 %c) local_unnamed_addr{ +entry: + %add = add nsw i32 %b, %a + %call = tail call i32 @fn1(i32 %add, i32 %c, i32 10) + %cmp = icmp sgt i32 %call, 10 + %add1 = add nsw i32 %call, 10 + %retval.0 = select i1 %cmp, i32 %add1, i32 %call + ret i32 %retval.0 +} + +declare dso_local i32 @fn1(i32, i32, i32) local_unnamed_addr + +; Function Attrs: nounwind readnone speculatable willreturn +declare void @llvm.dbg.value(metadata, metadata, metadata) + +!llvm.ident = !{!0} + +!0 = !{!"clang version 10.0.0"} diff --git a/llvm/test/DebugInfo/ARM/call-site-info-output.ll b/llvm/test/DebugInfo/ARM/call-site-info-output.ll new file mode 100644 index 0000000000000..9255a7d57dde9 --- /dev/null +++ b/llvm/test/DebugInfo/ARM/call-site-info-output.ll @@ -0,0 +1,41 @@ +; RUN: llc -mtriple arm-linux-gnu -debug-entry-values %s -o - -stop-before=finalize-isel | FileCheck %s +; Verify that Selection DAG knows how to recognize simple function parameter forwarding registers. +; Produced from: +; extern int fn1(int,int,int); +; int fn2(int a, int b, int c) { +; int local = fn1(a+b, c, 10); +; if (local > 10) +; return local + 10; +; return local; +; } +; clang -g -O2 -target arm-linux-gnu -S -emit-llvm %s +; CHECK: callSites: +; CHECK-NEXT: - { bb: {{.*}}, offset: {{.*}}, fwdArgRegs: +; CHECK-NEXT: - { arg: 0, reg: '$r0' } +; CHECK-NEXT: - { arg: 1, reg: '$r1' } +; CHECK-NEXT: - { arg: 2, reg: '$r2' } } + +; ModuleID = 'call-site-info-output.c' +source_filename = "call-site-info-output.c" +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "armv4t-unknown-linux-gnu" + +; Function Attrs: nounwind +define dso_local arm_aapcscc i32 @fn2(i32 %a, i32 %b, i32 %c) { +entry: + %add = add nsw i32 %b, %a + %call = tail call arm_aapcscc i32 @fn1(i32 %add, i32 %c, i32 10) + %cmp = icmp sgt i32 %call, 10 + %add1 = select i1 %cmp, i32 %c, i32 0 + %retval.0 = add nsw i32 %add1, %call + ret i32 %retval.0 +} + +declare dso_local arm_aapcscc i32 @fn1(i32, i32, i32) local_unnamed_addr + +; Function Attrs: nounwind readnone speculatable willreturn +declare void @llvm.dbg.value(metadata, metadata, metadata) + +!llvm.ident = !{!0} + +!0 = !{!"clang version 10.0.0"} From 8d6651f7b11ee3bc68d4e5523e44e987b53760bf Mon Sep 17 00:00:00 2001 From: Andrea Di Biagio Date: Tue, 8 Oct 2019 10:46:01 +0000 Subject: [PATCH 227/254] [MCA][LSUnit] Track loads and stores until retirement. Before this patch, loads and stores were only tracked by their corresponding queues in the LSUnit from dispatch until execute stage. In practice we should be more conservative and assume that memory opcodes leave their queues at retirement stage. Basically, loads should leave the load queue only when they have completed and delivered their data. We conservatively assume that a load is completed when it is retired. Stores should be tracked by the store queue from dispatch until retirement. In practice, stores can only leave the store queue if their data can be written to the data cache. This is mostly a mechanical change. With this patch, the retire stage notifies the LSUnit when a memory instruction is retired. That would triggers the release of LDQ/STQ entries. The only visible change is in memory tests for the bdver2 model. That is because bdver2 is the only model that defines the load/store queue size. This patch partially addresses PR39830. Differential Revision: https://reviews.llvm.org/D68266 llvm-svn: 374034 --- llvm/include/llvm/MCA/HardwareUnits/LSUnit.h | 10 ++- llvm/include/llvm/MCA/Stages/RetireStage.h | 6 +- llvm/lib/MCA/Context.cpp | 2 +- llvm/lib/MCA/HardwareUnits/LSUnit.cpp | 16 ++-- llvm/lib/MCA/Stages/RetireStage.cpp | 4 + .../X86/BdVer2/load-store-throughput.s | 30 ++++---- .../llvm-mca/X86/BdVer2/load-throughput.s | 44 +++++------ .../llvm-mca/X86/BdVer2/store-throughput.s | 73 +++++++++---------- 8 files changed, 96 insertions(+), 89 deletions(-) diff --git a/llvm/include/llvm/MCA/HardwareUnits/LSUnit.h b/llvm/include/llvm/MCA/HardwareUnits/LSUnit.h index 0dd5d3322aa18..34903794db4a6 100644 --- a/llvm/include/llvm/MCA/HardwareUnits/LSUnit.h +++ b/llvm/include/llvm/MCA/HardwareUnits/LSUnit.h @@ -291,9 +291,14 @@ class LSUnitBase : public HardwareUnit { return NextGroupID++; } - // Instruction executed event handlers. virtual void onInstructionExecuted(const InstRef &IR); + // Loads are tracked by the LDQ (load queue) from dispatch until completion. + // Stores are tracked by the STQ (store queue) from dispatch until commitment. + // By default we conservatively assume that the LDQ receives a load at + // dispatch. Loads leave the LDQ at retirement stage. + virtual void onInstructionRetired(const InstRef &IR); + virtual void onInstructionIssued(const InstRef &IR) { unsigned GroupID = IR.getInstruction()->getLSUTokenID(); Groups[GroupID]->onInstructionIssued(IR); @@ -438,9 +443,6 @@ class LSUnit : public LSUnitBase { /// 6. A store has to wait until an older store barrier is fully executed. unsigned dispatch(const InstRef &IR) override; - // FIXME: For simplicity, we optimistically assume a similar behavior for - // store instructions. In practice, store operations don't tend to leave the - // store queue until they reach the 'Retired' stage (See PR39830). void onInstructionExecuted(const InstRef &IR) override; }; diff --git a/llvm/include/llvm/MCA/Stages/RetireStage.h b/llvm/include/llvm/MCA/Stages/RetireStage.h index 08c216ac7bf43..f4713688d25f6 100644 --- a/llvm/include/llvm/MCA/Stages/RetireStage.h +++ b/llvm/include/llvm/MCA/Stages/RetireStage.h @@ -16,6 +16,7 @@ #ifndef LLVM_MCA_RETIRE_STAGE_H #define LLVM_MCA_RETIRE_STAGE_H +#include "llvm/MCA/HardwareUnits/LSUnit.h" #include "llvm/MCA/HardwareUnits/RegisterFile.h" #include "llvm/MCA/HardwareUnits/RetireControlUnit.h" #include "llvm/MCA/Stages/Stage.h" @@ -27,13 +28,14 @@ class RetireStage final : public Stage { // Owner will go away when we move listeners/eventing to the stages. RetireControlUnit &RCU; RegisterFile &PRF; + LSUnitBase &LSU; RetireStage(const RetireStage &Other) = delete; RetireStage &operator=(const RetireStage &Other) = delete; public: - RetireStage(RetireControlUnit &R, RegisterFile &F) - : Stage(), RCU(R), PRF(F) {} + RetireStage(RetireControlUnit &R, RegisterFile &F, LSUnitBase &LS) + : Stage(), RCU(R), PRF(F), LSU(LS) {} bool hasWorkToComplete() const override { return !RCU.isEmpty(); } Error cycleStart() override; diff --git a/llvm/lib/MCA/Context.cpp b/llvm/lib/MCA/Context.cpp index 546c82c6dd987..0160e1f9f7874 100644 --- a/llvm/lib/MCA/Context.cpp +++ b/llvm/lib/MCA/Context.cpp @@ -44,7 +44,7 @@ Context::createDefaultPipeline(const PipelineOptions &Opts, SourceMgr &SrcMgr) { *RCU, *PRF); auto Execute = std::make_unique(*HWS, Opts.EnableBottleneckAnalysis); - auto Retire = std::make_unique(*RCU, *PRF); + auto Retire = std::make_unique(*RCU, *PRF, *LSU); // Pass the ownership of all the hardware units to this Context. addHardwareUnit(std::move(RCU)); diff --git a/llvm/lib/MCA/HardwareUnits/LSUnit.cpp b/llvm/lib/MCA/HardwareUnits/LSUnit.cpp index 973bb908e41a6..0ee084c7ce1a9 100644 --- a/llvm/lib/MCA/HardwareUnits/LSUnit.cpp +++ b/llvm/lib/MCA/HardwareUnits/LSUnit.cpp @@ -160,17 +160,19 @@ LSUnit::Status LSUnit::isAvailable(const InstRef &IR) const { } void LSUnitBase::onInstructionExecuted(const InstRef &IR) { - const InstrDesc &Desc = IR.getInstruction()->getDesc(); - bool IsALoad = Desc.MayLoad; - bool IsAStore = Desc.MayStore; - assert((IsALoad || IsAStore) && "Expected a memory operation!"); - unsigned GroupID = IR.getInstruction()->getLSUTokenID(); auto It = Groups.find(GroupID); + assert(It != Groups.end() && "Instruction not dispatched to the LS unit"); It->second->onInstructionExecuted(); - if (It->second->isExecuted()) { + if (It->second->isExecuted()) Groups.erase(It); - } +} + +void LSUnitBase::onInstructionRetired(const InstRef &IR) { + const InstrDesc &Desc = IR.getInstruction()->getDesc(); + bool IsALoad = Desc.MayLoad; + bool IsAStore = Desc.MayStore; + assert((IsALoad || IsAStore) && "Expected a memory operation!"); if (IsALoad) { releaseLQSlot(); diff --git a/llvm/lib/MCA/Stages/RetireStage.cpp b/llvm/lib/MCA/Stages/RetireStage.cpp index 735444525241a..f792af748bce9 100644 --- a/llvm/lib/MCA/Stages/RetireStage.cpp +++ b/llvm/lib/MCA/Stages/RetireStage.cpp @@ -52,6 +52,10 @@ void RetireStage::notifyInstructionRetired(const InstRef &IR) const { llvm::SmallVector FreedRegs(PRF.getNumRegisterFiles()); const Instruction &Inst = *IR.getInstruction(); + // Release the load/store queue entries. + if (Inst.isMemOp()) + LSU.onInstructionRetired(IR); + for (const WriteState &WS : Inst.getDefs()) PRF.removeRegisterWrite(WS, FreedRegs); notifyEvent(HWInstructionRetiredEvent(IR, FreedRegs)); diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/load-store-throughput.s b/llvm/test/tools/llvm-mca/X86/BdVer2/load-store-throughput.s index d9d6e9e18e530..4f53cce27e235 100644 --- a/llvm/test/tools/llvm-mca/X86/BdVer2/load-store-throughput.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/load-store-throughput.s @@ -507,12 +507,12 @@ movaps %xmm3, (%rbx) # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 400 -# CHECK-NEXT: Total Cycles: 593 +# CHECK-NEXT: Total Cycles: 554 # CHECK-NEXT: Total uOps: 400 # CHECK: Dispatch Width: 4 -# CHECK-NEXT: uOps Per Cycle: 0.67 -# CHECK-NEXT: IPC: 0.67 +# CHECK-NEXT: uOps Per Cycle: 0.72 +# CHECK-NEXT: IPC: 0.72 # CHECK-NEXT: Block RThroughput: 4.0 # CHECK: Instruction Info: @@ -532,24 +532,24 @@ movaps %xmm3, (%rbx) # CHECK: Dynamic Dispatch Stall Cycles: # CHECK-NEXT: RAT - Register unavailable: 0 # CHECK-NEXT: RCU - Retire tokens unavailable: 0 -# CHECK-NEXT: SCHEDQ - Scheduler full: 187 (31.5%) +# CHECK-NEXT: SCHEDQ - Scheduler full: 55 (9.9%) # CHECK-NEXT: LQ - Load queue full: 0 -# CHECK-NEXT: SQ - Store queue full: 342 (57.7%) +# CHECK-NEXT: SQ - Store queue full: 437 (78.9%) # CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0 # CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched: # CHECK-NEXT: [# dispatched], [# cycles] -# CHECK-NEXT: 0, 403 (68.0%) -# CHECK-NEXT: 1, 90 (15.2%) -# CHECK-NEXT: 2, 2 (0.3%) -# CHECK-NEXT: 3, 86 (14.5%) -# CHECK-NEXT: 4, 12 (2.0%) +# CHECK-NEXT: 0, 365 (65.9%) +# CHECK-NEXT: 1, 88 (15.9%) +# CHECK-NEXT: 2, 3 (0.5%) +# CHECK-NEXT: 3, 86 (15.5%) +# CHECK-NEXT: 4, 12 (2.2%) # CHECK: Schedulers - number of cycles where we saw N micro opcodes issued: # CHECK-NEXT: [# issued], [# cycles] -# CHECK-NEXT: 0, 292 (49.2%) -# CHECK-NEXT: 1, 202 (34.1%) -# CHECK-NEXT: 2, 99 (16.7%) +# CHECK-NEXT: 0, 253 (45.7%) +# CHECK-NEXT: 1, 202 (36.5%) +# CHECK-NEXT: 2, 99 (17.9%) # CHECK: Scheduler's queue usage: # CHECK-NEXT: [1] Resource name. @@ -595,8 +595,8 @@ movaps %xmm3, (%rbx) # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16.0] [16.1] [17] [18] Instructions: # CHECK-NEXT: - 1.00 - - - - - - - - - - - 1.00 - - - 3.00 - - - - 1.00 movd %mm0, (%rax) -# CHECK-NEXT: 0.36 2.64 - - - - - - - - - 3.00 - - - 1.00 - - - - 3.00 - - movd (%rcx), %mm1 -# CHECK-NEXT: 2.64 0.36 - - - - - - - - 3.00 - - - 1.00 - - - - 3.00 - - - movd (%rdx), %mm2 +# CHECK-NEXT: 1.53 1.47 - - - - - - - - - 3.00 - - - 1.00 - - - - 3.00 - - movd (%rcx), %mm1 +# CHECK-NEXT: 1.47 1.53 - - - - - - - - 3.00 - - - 1.00 - - - - 3.00 - - - movd (%rdx), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - 1.00 - - 3.00 - - - - - 1.00 movd %mm3, (%rbx) # CHECK: Timeline view: diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/load-throughput.s b/llvm/test/tools/llvm-mca/X86/BdVer2/load-throughput.s index 6c9f15905c246..dfb45af19f3bc 100644 --- a/llvm/test/tools/llvm-mca/X86/BdVer2/load-throughput.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/load-throughput.s @@ -80,7 +80,7 @@ vmovaps (%rbx), %ymm3 # CHECK-NEXT: RAT - Register unavailable: 0 # CHECK-NEXT: RCU - Retire tokens unavailable: 0 # CHECK-NEXT: SCHEDQ - Scheduler full: 0 -# CHECK-NEXT: LQ - Load queue full: 353 (86.9%) +# CHECK-NEXT: LQ - Load queue full: 354 (87.2%) # CHECK-NEXT: SQ - Store queue full: 0 # CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0 @@ -102,9 +102,9 @@ vmovaps (%rbx), %ymm3 # CHECK-NEXT: [4] Total number of buffer entries. # CHECK: [1] [2] [3] [4] -# CHECK-NEXT: PdEX 32 36 40 +# CHECK-NEXT: PdEX 31 34 40 # CHECK-NEXT: PdFPU 0 0 64 -# CHECK-NEXT: PdLoad 37 40 40 +# CHECK-NEXT: PdLoad 36 40 40 # CHECK-NEXT: PdStore 0 0 24 # CHECK: Resources: @@ -193,7 +193,7 @@ vmovaps (%rbx), %ymm3 # CHECK-NEXT: RAT - Register unavailable: 0 # CHECK-NEXT: RCU - Retire tokens unavailable: 0 # CHECK-NEXT: SCHEDQ - Scheduler full: 0 -# CHECK-NEXT: LQ - Load queue full: 353 (86.9%) +# CHECK-NEXT: LQ - Load queue full: 354 (87.2%) # CHECK-NEXT: SQ - Store queue full: 0 # CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0 @@ -215,9 +215,9 @@ vmovaps (%rbx), %ymm3 # CHECK-NEXT: [4] Total number of buffer entries. # CHECK: [1] [2] [3] [4] -# CHECK-NEXT: PdEX 32 36 40 +# CHECK-NEXT: PdEX 31 34 40 # CHECK-NEXT: PdFPU 0 0 64 -# CHECK-NEXT: PdLoad 37 40 40 +# CHECK-NEXT: PdLoad 36 40 40 # CHECK-NEXT: PdStore 0 0 24 # CHECK: Resources: @@ -306,7 +306,7 @@ vmovaps (%rbx), %ymm3 # CHECK-NEXT: RAT - Register unavailable: 0 # CHECK-NEXT: RCU - Retire tokens unavailable: 0 # CHECK-NEXT: SCHEDQ - Scheduler full: 0 -# CHECK-NEXT: LQ - Load queue full: 353 (86.9%) +# CHECK-NEXT: LQ - Load queue full: 354 (87.2%) # CHECK-NEXT: SQ - Store queue full: 0 # CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0 @@ -328,9 +328,9 @@ vmovaps (%rbx), %ymm3 # CHECK-NEXT: [4] Total number of buffer entries. # CHECK: [1] [2] [3] [4] -# CHECK-NEXT: PdEX 32 36 40 +# CHECK-NEXT: PdEX 31 34 40 # CHECK-NEXT: PdFPU 0 0 64 -# CHECK-NEXT: PdLoad 37 40 40 +# CHECK-NEXT: PdLoad 36 40 40 # CHECK-NEXT: PdStore 0 0 24 # CHECK: Resources: @@ -419,7 +419,7 @@ vmovaps (%rbx), %ymm3 # CHECK-NEXT: RAT - Register unavailable: 0 # CHECK-NEXT: RCU - Retire tokens unavailable: 0 # CHECK-NEXT: SCHEDQ - Scheduler full: 0 -# CHECK-NEXT: LQ - Load queue full: 353 (86.9%) +# CHECK-NEXT: LQ - Load queue full: 354 (87.2%) # CHECK-NEXT: SQ - Store queue full: 0 # CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0 @@ -441,9 +441,9 @@ vmovaps (%rbx), %ymm3 # CHECK-NEXT: [4] Total number of buffer entries. # CHECK: [1] [2] [3] [4] -# CHECK-NEXT: PdEX 32 36 40 +# CHECK-NEXT: PdEX 31 34 40 # CHECK-NEXT: PdFPU 0 0 64 -# CHECK-NEXT: PdLoad 37 40 40 +# CHECK-NEXT: PdLoad 36 40 40 # CHECK-NEXT: PdStore 0 0 24 # CHECK: Resources: @@ -532,7 +532,7 @@ vmovaps (%rbx), %ymm3 # CHECK-NEXT: RAT - Register unavailable: 0 # CHECK-NEXT: RCU - Retire tokens unavailable: 0 # CHECK-NEXT: SCHEDQ - Scheduler full: 0 -# CHECK-NEXT: LQ - Load queue full: 532 (87.9%) +# CHECK-NEXT: LQ - Load queue full: 533 (88.1%) # CHECK-NEXT: SQ - Store queue full: 0 # CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0 @@ -554,8 +554,8 @@ vmovaps (%rbx), %ymm3 # CHECK-NEXT: [4] Total number of buffer entries. # CHECK: [1] [2] [3] [4] -# CHECK-NEXT: PdEX 34 38 40 -# CHECK-NEXT: PdFPU 34 38 64 +# CHECK-NEXT: PdEX 33 36 40 +# CHECK-NEXT: PdFPU 33 36 64 # CHECK-NEXT: PdLoad 37 40 40 # CHECK-NEXT: PdStore 0 0 24 @@ -646,7 +646,7 @@ vmovaps (%rbx), %ymm3 # CHECK-NEXT: RAT - Register unavailable: 0 # CHECK-NEXT: RCU - Retire tokens unavailable: 0 # CHECK-NEXT: SCHEDQ - Scheduler full: 0 -# CHECK-NEXT: LQ - Load queue full: 532 (87.9%) +# CHECK-NEXT: LQ - Load queue full: 533 (88.1%) # CHECK-NEXT: SQ - Store queue full: 0 # CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0 @@ -668,8 +668,8 @@ vmovaps (%rbx), %ymm3 # CHECK-NEXT: [4] Total number of buffer entries. # CHECK: [1] [2] [3] [4] -# CHECK-NEXT: PdEX 34 38 40 -# CHECK-NEXT: PdFPU 34 38 64 +# CHECK-NEXT: PdEX 33 36 40 +# CHECK-NEXT: PdFPU 33 36 64 # CHECK-NEXT: PdLoad 37 40 40 # CHECK-NEXT: PdStore 0 0 24 @@ -760,7 +760,7 @@ vmovaps (%rbx), %ymm3 # CHECK-NEXT: RAT - Register unavailable: 0 # CHECK-NEXT: RCU - Retire tokens unavailable: 0 # CHECK-NEXT: SCHEDQ - Scheduler full: 0 -# CHECK-NEXT: LQ - Load queue full: 344 (56.9%) +# CHECK-NEXT: LQ - Load queue full: 345 (57.0%) # CHECK-NEXT: SQ - Store queue full: 0 # CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0 @@ -781,9 +781,9 @@ vmovaps (%rbx), %ymm3 # CHECK-NEXT: [4] Total number of buffer entries. # CHECK: [1] [2] [3] [4] -# CHECK-NEXT: PdEX 33 38 40 -# CHECK-NEXT: PdFPU 33 38 64 -# CHECK-NEXT: PdLoad 37 40 40 +# CHECK-NEXT: PdEX 33 36 40 +# CHECK-NEXT: PdFPU 33 36 64 +# CHECK-NEXT: PdLoad 36 40 40 # CHECK-NEXT: PdStore 0 0 24 # CHECK: Resources: diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/store-throughput.s b/llvm/test/tools/llvm-mca/X86/BdVer2/store-throughput.s index 4fc58a3827e70..b24272c4166ad 100644 --- a/llvm/test/tools/llvm-mca/X86/BdVer2/store-throughput.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/store-throughput.s @@ -81,14 +81,13 @@ vmovaps %ymm3, (%rbx) # CHECK-NEXT: RCU - Retire tokens unavailable: 0 # CHECK-NEXT: SCHEDQ - Scheduler full: 0 # CHECK-NEXT: LQ - Load queue full: 0 -# CHECK-NEXT: SQ - Store queue full: 370 (91.8%) +# CHECK-NEXT: SQ - Store queue full: 371 (92.1%) # CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0 # CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched: # CHECK-NEXT: [# dispatched], [# cycles] -# CHECK-NEXT: 0, 25 (6.2%) -# CHECK-NEXT: 1, 370 (91.8%) -# CHECK-NEXT: 2, 1 (0.2%) +# CHECK-NEXT: 0, 24 (6.0%) +# CHECK-NEXT: 1, 372 (92.3%) # CHECK-NEXT: 4, 7 (1.7%) # CHECK: Schedulers - number of cycles where we saw N micro opcodes issued: @@ -103,10 +102,10 @@ vmovaps %ymm3, (%rbx) # CHECK-NEXT: [4] Total number of buffer entries. # CHECK: [1] [2] [3] [4] -# CHECK-NEXT: PdEX 22 23 40 +# CHECK-NEXT: PdEX 21 22 40 # CHECK-NEXT: PdFPU 0 0 64 # CHECK-NEXT: PdLoad 0 0 40 -# CHECK-NEXT: PdStore 23 24 24 +# CHECK-NEXT: PdStore 22 23 24 # CHECK: Resources: # CHECK-NEXT: [0.0] - PdAGLU01 @@ -195,14 +194,13 @@ vmovaps %ymm3, (%rbx) # CHECK-NEXT: RCU - Retire tokens unavailable: 0 # CHECK-NEXT: SCHEDQ - Scheduler full: 0 # CHECK-NEXT: LQ - Load queue full: 0 -# CHECK-NEXT: SQ - Store queue full: 370 (91.8%) +# CHECK-NEXT: SQ - Store queue full: 371 (92.1%) # CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0 # CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched: # CHECK-NEXT: [# dispatched], [# cycles] -# CHECK-NEXT: 0, 25 (6.2%) -# CHECK-NEXT: 1, 370 (91.8%) -# CHECK-NEXT: 2, 1 (0.2%) +# CHECK-NEXT: 0, 24 (6.0%) +# CHECK-NEXT: 1, 372 (92.3%) # CHECK-NEXT: 4, 7 (1.7%) # CHECK: Schedulers - number of cycles where we saw N micro opcodes issued: @@ -217,10 +215,10 @@ vmovaps %ymm3, (%rbx) # CHECK-NEXT: [4] Total number of buffer entries. # CHECK: [1] [2] [3] [4] -# CHECK-NEXT: PdEX 22 23 40 +# CHECK-NEXT: PdEX 21 22 40 # CHECK-NEXT: PdFPU 0 0 64 # CHECK-NEXT: PdLoad 0 0 40 -# CHECK-NEXT: PdStore 23 24 24 +# CHECK-NEXT: PdStore 22 23 24 # CHECK: Resources: # CHECK-NEXT: [0.0] - PdAGLU01 @@ -309,14 +307,13 @@ vmovaps %ymm3, (%rbx) # CHECK-NEXT: RCU - Retire tokens unavailable: 0 # CHECK-NEXT: SCHEDQ - Scheduler full: 0 # CHECK-NEXT: LQ - Load queue full: 0 -# CHECK-NEXT: SQ - Store queue full: 370 (91.8%) +# CHECK-NEXT: SQ - Store queue full: 371 (92.1%) # CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0 # CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched: # CHECK-NEXT: [# dispatched], [# cycles] -# CHECK-NEXT: 0, 25 (6.2%) -# CHECK-NEXT: 1, 370 (91.8%) -# CHECK-NEXT: 2, 1 (0.2%) +# CHECK-NEXT: 0, 24 (6.0%) +# CHECK-NEXT: 1, 372 (92.3%) # CHECK-NEXT: 4, 7 (1.7%) # CHECK: Schedulers - number of cycles where we saw N micro opcodes issued: @@ -331,10 +328,10 @@ vmovaps %ymm3, (%rbx) # CHECK-NEXT: [4] Total number of buffer entries. # CHECK: [1] [2] [3] [4] -# CHECK-NEXT: PdEX 22 23 40 +# CHECK-NEXT: PdEX 21 22 40 # CHECK-NEXT: PdFPU 0 0 64 # CHECK-NEXT: PdLoad 0 0 40 -# CHECK-NEXT: PdStore 23 24 24 +# CHECK-NEXT: PdStore 22 23 24 # CHECK: Resources: # CHECK-NEXT: [0.0] - PdAGLU01 @@ -423,14 +420,13 @@ vmovaps %ymm3, (%rbx) # CHECK-NEXT: RCU - Retire tokens unavailable: 0 # CHECK-NEXT: SCHEDQ - Scheduler full: 0 # CHECK-NEXT: LQ - Load queue full: 0 -# CHECK-NEXT: SQ - Store queue full: 370 (91.8%) +# CHECK-NEXT: SQ - Store queue full: 371 (92.1%) # CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0 # CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched: # CHECK-NEXT: [# dispatched], [# cycles] -# CHECK-NEXT: 0, 25 (6.2%) -# CHECK-NEXT: 1, 370 (91.8%) -# CHECK-NEXT: 2, 1 (0.2%) +# CHECK-NEXT: 0, 24 (6.0%) +# CHECK-NEXT: 1, 372 (92.3%) # CHECK-NEXT: 4, 7 (1.7%) # CHECK: Schedulers - number of cycles where we saw N micro opcodes issued: @@ -445,10 +441,10 @@ vmovaps %ymm3, (%rbx) # CHECK-NEXT: [4] Total number of buffer entries. # CHECK: [1] [2] [3] [4] -# CHECK-NEXT: PdEX 22 23 40 +# CHECK-NEXT: PdEX 21 22 40 # CHECK-NEXT: PdFPU 0 0 64 # CHECK-NEXT: PdLoad 0 0 40 -# CHECK-NEXT: PdStore 23 24 24 +# CHECK-NEXT: PdStore 22 23 24 # CHECK: Resources: # CHECK-NEXT: [0.0] - PdAGLU01 @@ -537,7 +533,7 @@ vmovaps %ymm3, (%rbx) # CHECK-NEXT: RCU - Retire tokens unavailable: 0 # CHECK-NEXT: SCHEDQ - Scheduler full: 0 # CHECK-NEXT: LQ - Load queue full: 0 -# CHECK-NEXT: SQ - Store queue full: 747 (93.0%) +# CHECK-NEXT: SQ - Store queue full: 748 (93.2%) # CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0 # CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched: @@ -559,10 +555,10 @@ vmovaps %ymm3, (%rbx) # CHECK-NEXT: [4] Total number of buffer entries. # CHECK: [1] [2] [3] [4] -# CHECK-NEXT: PdEX 22 23 40 -# CHECK-NEXT: PdFPU 22 23 64 +# CHECK-NEXT: PdEX 21 23 40 +# CHECK-NEXT: PdFPU 21 23 64 # CHECK-NEXT: PdLoad 0 0 40 -# CHECK-NEXT: PdStore 23 24 24 +# CHECK-NEXT: PdStore 22 24 24 # CHECK: Resources: # CHECK-NEXT: [0.0] - PdAGLU01 @@ -650,16 +646,17 @@ vmovaps %ymm3, (%rbx) # CHECK: Dynamic Dispatch Stall Cycles: # CHECK-NEXT: RAT - Register unavailable: 0 # CHECK-NEXT: RCU - Retire tokens unavailable: 0 -# CHECK-NEXT: SCHEDQ - Scheduler full: 185 (30.7%) +# CHECK-NEXT: SCHEDQ - Scheduler full: 0 # CHECK-NEXT: LQ - Load queue full: 0 -# CHECK-NEXT: SQ - Store queue full: 372 (61.8%) +# CHECK-NEXT: SQ - Store queue full: 559 (92.9%) # CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0 # CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched: # CHECK-NEXT: [# dispatched], [# cycles] -# CHECK-NEXT: 0, 223 (37.0%) -# CHECK-NEXT: 1, 372 (61.8%) -# CHECK-NEXT: 4, 7 (1.2%) +# CHECK-NEXT: 0, 222 (36.9%) +# CHECK-NEXT: 1, 373 (62.0%) +# CHECK-NEXT: 3, 1 (0.2%) +# CHECK-NEXT: 4, 6 (1.0%) # CHECK: Schedulers - number of cycles where we saw N micro opcodes issued: # CHECK-NEXT: [# issued], [# cycles] @@ -673,10 +670,10 @@ vmovaps %ymm3, (%rbx) # CHECK-NEXT: [4] Total number of buffer entries. # CHECK: [1] [2] [3] [4] -# CHECK-NEXT: PdEX 22 24 40 -# CHECK-NEXT: PdFPU 22 24 64 +# CHECK-NEXT: PdEX 21 23 40 +# CHECK-NEXT: PdFPU 21 23 64 # CHECK-NEXT: PdLoad 0 0 40 -# CHECK-NEXT: PdStore 23 24 24 +# CHECK-NEXT: PdStore 22 24 24 # CHECK: Resources: # CHECK-NEXT: [0.0] - PdAGLU01 @@ -763,9 +760,9 @@ vmovaps %ymm3, (%rbx) # CHECK: Dynamic Dispatch Stall Cycles: # CHECK-NEXT: RAT - Register unavailable: 0 # CHECK-NEXT: RCU - Retire tokens unavailable: 0 -# CHECK-NEXT: SCHEDQ - Scheduler full: 5963 (83.2%) +# CHECK-NEXT: SCHEDQ - Scheduler full: 5777 (80.6%) # CHECK-NEXT: LQ - Load queue full: 0 -# CHECK-NEXT: SQ - Store queue full: 374 (5.2%) +# CHECK-NEXT: SQ - Store queue full: 561 (7.8%) # CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0 # CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched: From 67f542aba72f552c4833bb253761dbcc54071016 Mon Sep 17 00:00:00 2001 From: James Clarke Date: Tue, 8 Oct 2019 11:34:02 +0000 Subject: [PATCH 228/254] [Diagnostics] Silence -Wsizeof-array-div for character buffers Summary: Character buffers are sometimes used to represent a pool of memory that contains non-character objects, due to them being synonymous with a stream of bytes on almost all modern architectures. Often, when interacting with hardware devices, byte buffers are therefore used as an intermediary and so we can end Character buffers are sometimes used to represent a pool of memory that contains non-character objects, due to them being synonymous with a stream of bytes on almost all modern architectures. Often, when interacting with hardware devices, byte buffers are therefore used as an intermediary and so we can end up generating lots of false-positives. Moreover, due to the ability of character pointers to alias non-character pointers, the strict aliasing violations that would generally be implied by the calculations caught by the warning (if the calculation itself is in fact correct) do not apply here, and so although the length calculation may be wrong, that is the only possible issue. Reviewers: rsmith, xbolva00, thakis Reviewed By: xbolva00, thakis Subscribers: thakis, lebedev.ri, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D68526 llvm-svn: 374035 --- clang/lib/Sema/SemaExpr.cpp | 1 + clang/test/Sema/div-sizeof-array.cpp | 2 ++ 2 files changed, 3 insertions(+) diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index f08b616809464..d158eaabde9ea 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -9197,6 +9197,7 @@ static void DiagnoseDivisionSizeofPointerOrArray(Sema &S, Expr *LHS, Expr *RHS, QualType ArrayElemTy = ArrayTy->getElementType(); if (ArrayElemTy != S.Context.getBaseElementType(ArrayTy) || ArrayElemTy->isDependentType() || RHSTy->isDependentType() || + ArrayElemTy->isCharType() || S.Context.getTypeSize(ArrayElemTy) == S.Context.getTypeSize(RHSTy)) return; S.Diag(Loc, diag::warn_division_sizeof_array) diff --git a/clang/test/Sema/div-sizeof-array.cpp b/clang/test/Sema/div-sizeof-array.cpp index 7c76a5265b85e..e295a9dec6d88 100644 --- a/clang/test/Sema/div-sizeof-array.cpp +++ b/clang/test/Sema/div-sizeof-array.cpp @@ -25,6 +25,8 @@ void test(void) { int a10 = sizeof(arr3) / sizeof(char); int a11 = sizeof(arr2) / (sizeof(unsigned)); int a12 = sizeof(arr) / (sizeof(short)); + int a13 = sizeof(arr3) / sizeof(p); + int a14 = sizeof(arr3) / sizeof(int); int arr4[10][12]; int b1 = sizeof(arr4) / sizeof(arr2[12]); From 537225a6a352f66f3b536b27359786dcca919a80 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Tue, 8 Oct 2019 11:54:42 +0000 Subject: [PATCH 229/254] [LoopRotate] Unconditionally get DomTree. LoopRotate is a loop pass and the DomTree should always be available. Similar to a70c5261436322a53187d67b8bdc0445d0463a9a llvm-svn: 374036 --- llvm/lib/Transforms/Scalar/LoopRotation.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/LoopRotation.cpp b/llvm/lib/Transforms/Scalar/LoopRotation.cpp index 3585b71748a68..94517996df392 100644 --- a/llvm/lib/Transforms/Scalar/LoopRotation.cpp +++ b/llvm/lib/Transforms/Scalar/LoopRotation.cpp @@ -94,8 +94,7 @@ class LoopRotateLegacyPass : public LoopPass { auto *LI = &getAnalysis().getLoopInfo(); const auto *TTI = &getAnalysis().getTTI(F); auto *AC = &getAnalysis().getAssumptionCache(F); - auto *DTWP = getAnalysisIfAvailable(); - auto *DT = DTWP ? &DTWP->getDomTree() : nullptr; + auto &DT = getAnalysis().getDomTree(); auto &SE = getAnalysis().getSE(); const SimplifyQuery SQ = getBestSimplifyQuery(*this, F); Optional MSSAU; @@ -103,7 +102,7 @@ class LoopRotateLegacyPass : public LoopPass { MemorySSA *MSSA = &getAnalysis().getMSSA(); MSSAU = MemorySSAUpdater(MSSA); } - return LoopRotation(L, LI, TTI, AC, DT, &SE, + return LoopRotation(L, LI, TTI, AC, &DT, &SE, MSSAU.hasValue() ? MSSAU.getPointer() : nullptr, SQ, false, MaxHeaderSize, false); } From f34271d886901b5a96db8d8ac3341a8c59f1c141 Mon Sep 17 00:00:00 2001 From: Andrey Churbanov Date: Tue, 8 Oct 2019 12:23:25 +0000 Subject: [PATCH 230/254] Don't link libm with -Wl,--as-needed on FreeBSD Patch by jbeich (Jan Beich) Differential Revision: https://reviews.llvm.org/D68051 llvm-svn: 374037 --- openmp/runtime/cmake/LibompHandleFlags.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/openmp/runtime/cmake/LibompHandleFlags.cmake b/openmp/runtime/cmake/LibompHandleFlags.cmake index 4ad8e260b2370..046c5d8137529 100644 --- a/openmp/runtime/cmake/LibompHandleFlags.cmake +++ b/openmp/runtime/cmake/LibompHandleFlags.cmake @@ -126,11 +126,11 @@ function(libomp_get_libflags libflags) if(${IA32}) libomp_append(libflags_local -lirc_pic LIBOMP_HAVE_IRC_PIC_LIBRARY) endif() - if(${CMAKE_SYSTEM_NAME} MATCHES "DragonFly") + if(${CMAKE_SYSTEM_NAME} MATCHES "DragonFly|FreeBSD") libomp_append(libflags_local "-Wl,--no-as-needed" LIBOMP_HAVE_AS_NEEDED_FLAG) libomp_append(libflags_local "-lm") libomp_append(libflags_local "-Wl,--as-needed" LIBOMP_HAVE_AS_NEEDED_FLAG) - elseif(${CMAKE_SYSTEM_NAME} MATCHES "(Free|Net)BSD") + elseif(${CMAKE_SYSTEM_NAME} MATCHES "NetBSD") libomp_append(libflags_local -lm) endif() set(libflags_local ${libflags_local} ${LIBOMP_LIBFLAGS}) From ca2973bb2024a72efd9bd583d6587bfff0a5545f Mon Sep 17 00:00:00 2001 From: Andrey Churbanov Date: Tue, 8 Oct 2019 12:39:04 +0000 Subject: [PATCH 231/254] Don't assume Type from `readelf -d` has parentheses Patch by jbeich (Jan Beich) Differential Revision: https://reviews.llvm.org/D68053 llvm-svn: 374038 --- openmp/runtime/tools/check-depends.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openmp/runtime/tools/check-depends.pl b/openmp/runtime/tools/check-depends.pl index 168c0cd422462..6f8c8af5a222d 100755 --- a/openmp/runtime/tools/check-depends.pl +++ b/openmp/runtime/tools/check-depends.pl @@ -131,7 +131,7 @@ ($) # Parse body. while ( $i < @bulk ) { my $line = $bulk[ $i ]; - if ( $line !~ m{^\s*0x[0-9a-f]+\s+\(([_A-Z0-9]+)\)\s+(.*)\s*$}i ) { + if ( $line !~ m{^\s*0x[0-9a-f]+\s+\(?([_A-Z0-9]+)\)?\s+(.*)\s*$}i ) { parse_error( $tool, @bulk, $i ); }; # if my ( $type, $value ) = ( $1, $2 ); From 5cce533525d6d5174da796622a79bada6b48bba3 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Tue, 8 Oct 2019 12:43:46 +0000 Subject: [PATCH 232/254] [SLP] add test with prefer-vector-width function attribute; NFC llvm-svn: 374039 --- .../Transforms/SLPVectorizer/X86/pr19657.ll | 104 ++++++++++++------ 1 file changed, 73 insertions(+), 31 deletions(-) diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr19657.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr19657.ll index 2bde319c175c6..39db608fc9b41 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/pr19657.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/pr19657.ll @@ -1,40 +1,40 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -basicaa -slp-vectorizer -S -mcpu=corei7-avx | FileCheck %s -; RUN: opt < %s -basicaa -slp-vectorizer -slp-max-reg-size=128 -S -mcpu=corei7-avx | FileCheck %s --check-prefix=V128 +; RUN: opt < %s -basicaa -slp-vectorizer -S -mcpu=corei7-avx | FileCheck %s --check-prefixes=ANY,AVX +; RUN: opt < %s -basicaa -slp-vectorizer -slp-max-reg-size=128 -S -mcpu=corei7-avx | FileCheck %s --check-prefixes=ANY,MAX128 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" -define void @foo(double* %x) { -; CHECK-LABEL: @foo( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds double, double* [[X:%.*]], i64 1 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds double, double* [[X]], i64 2 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds double, double* [[X]], i64 3 -; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* [[X]] to <4 x double>* -; CHECK-NEXT: [[TMP5:%.*]] = load <4 x double>, <4 x double>* [[TMP4]], align 8 -; CHECK-NEXT: [[TMP6:%.*]] = fadd <4 x double> [[TMP5]], [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = fadd <4 x double> [[TMP6]], [[TMP5]] -; CHECK-NEXT: [[TMP8:%.*]] = bitcast double* [[X]] to <4 x double>* -; CHECK-NEXT: store <4 x double> [[TMP7]], <4 x double>* [[TMP8]], align 8 -; CHECK-NEXT: ret void +define void @store_chains(double* %x) { +; AVX-LABEL: @store_chains( +; AVX-NEXT: [[TMP1:%.*]] = getelementptr inbounds double, double* [[X:%.*]], i64 1 +; AVX-NEXT: [[TMP2:%.*]] = getelementptr inbounds double, double* [[X]], i64 2 +; AVX-NEXT: [[TMP3:%.*]] = getelementptr inbounds double, double* [[X]], i64 3 +; AVX-NEXT: [[TMP4:%.*]] = bitcast double* [[X]] to <4 x double>* +; AVX-NEXT: [[TMP5:%.*]] = load <4 x double>, <4 x double>* [[TMP4]], align 8 +; AVX-NEXT: [[TMP6:%.*]] = fadd <4 x double> [[TMP5]], [[TMP5]] +; AVX-NEXT: [[TMP7:%.*]] = fadd <4 x double> [[TMP6]], [[TMP5]] +; AVX-NEXT: [[TMP8:%.*]] = bitcast double* [[X]] to <4 x double>* +; AVX-NEXT: store <4 x double> [[TMP7]], <4 x double>* [[TMP8]], align 8 +; AVX-NEXT: ret void ; -; V128-LABEL: @foo( -; V128-NEXT: [[TMP1:%.*]] = getelementptr inbounds double, double* [[X:%.*]], i64 1 -; V128-NEXT: [[TMP2:%.*]] = bitcast double* [[X]] to <2 x double>* -; V128-NEXT: [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8 -; V128-NEXT: [[TMP4:%.*]] = fadd <2 x double> [[TMP3]], [[TMP3]] -; V128-NEXT: [[TMP5:%.*]] = fadd <2 x double> [[TMP4]], [[TMP3]] -; V128-NEXT: [[TMP6:%.*]] = bitcast double* [[X]] to <2 x double>* -; V128-NEXT: store <2 x double> [[TMP5]], <2 x double>* [[TMP6]], align 8 -; V128-NEXT: [[TMP7:%.*]] = getelementptr inbounds double, double* [[X]], i64 2 -; V128-NEXT: [[TMP8:%.*]] = getelementptr inbounds double, double* [[X]], i64 3 -; V128-NEXT: [[TMP9:%.*]] = bitcast double* [[TMP7]] to <2 x double>* -; V128-NEXT: [[TMP10:%.*]] = load <2 x double>, <2 x double>* [[TMP9]], align 8 -; V128-NEXT: [[TMP11:%.*]] = fadd <2 x double> [[TMP10]], [[TMP10]] -; V128-NEXT: [[TMP12:%.*]] = fadd <2 x double> [[TMP11]], [[TMP10]] -; V128-NEXT: [[TMP13:%.*]] = bitcast double* [[TMP7]] to <2 x double>* -; V128-NEXT: store <2 x double> [[TMP12]], <2 x double>* [[TMP13]], align 8 -; V128-NEXT: ret void +; MAX128-LABEL: @store_chains( +; MAX128-NEXT: [[TMP1:%.*]] = getelementptr inbounds double, double* [[X:%.*]], i64 1 +; MAX128-NEXT: [[TMP2:%.*]] = bitcast double* [[X]] to <2 x double>* +; MAX128-NEXT: [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8 +; MAX128-NEXT: [[TMP4:%.*]] = fadd <2 x double> [[TMP3]], [[TMP3]] +; MAX128-NEXT: [[TMP5:%.*]] = fadd <2 x double> [[TMP4]], [[TMP3]] +; MAX128-NEXT: [[TMP6:%.*]] = bitcast double* [[X]] to <2 x double>* +; MAX128-NEXT: store <2 x double> [[TMP5]], <2 x double>* [[TMP6]], align 8 +; MAX128-NEXT: [[TMP7:%.*]] = getelementptr inbounds double, double* [[X]], i64 2 +; MAX128-NEXT: [[TMP8:%.*]] = getelementptr inbounds double, double* [[X]], i64 3 +; MAX128-NEXT: [[TMP9:%.*]] = bitcast double* [[TMP7]] to <2 x double>* +; MAX128-NEXT: [[TMP10:%.*]] = load <2 x double>, <2 x double>* [[TMP9]], align 8 +; MAX128-NEXT: [[TMP11:%.*]] = fadd <2 x double> [[TMP10]], [[TMP10]] +; MAX128-NEXT: [[TMP12:%.*]] = fadd <2 x double> [[TMP11]], [[TMP10]] +; MAX128-NEXT: [[TMP13:%.*]] = bitcast double* [[TMP7]] to <2 x double>* +; MAX128-NEXT: store <2 x double> [[TMP12]], <2 x double>* [[TMP13]], align 8 +; MAX128-NEXT: ret void ; %1 = load double, double* %x, align 8 %2 = fadd double %1, %1 @@ -58,3 +58,45 @@ define void @foo(double* %x) { ret void } +define void @store_chains_prefer_width_attr(double* %x) #0 { +; ANY-LABEL: @store_chains_prefer_width_attr( +; ANY-NEXT: [[TMP1:%.*]] = getelementptr inbounds double, double* [[X:%.*]], i64 1 +; ANY-NEXT: [[TMP2:%.*]] = bitcast double* [[X]] to <2 x double>* +; ANY-NEXT: [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8 +; ANY-NEXT: [[TMP4:%.*]] = fadd <2 x double> [[TMP3]], [[TMP3]] +; ANY-NEXT: [[TMP5:%.*]] = fadd <2 x double> [[TMP4]], [[TMP3]] +; ANY-NEXT: [[TMP6:%.*]] = bitcast double* [[X]] to <2 x double>* +; ANY-NEXT: store <2 x double> [[TMP5]], <2 x double>* [[TMP6]], align 8 +; ANY-NEXT: [[TMP7:%.*]] = getelementptr inbounds double, double* [[X]], i64 2 +; ANY-NEXT: [[TMP8:%.*]] = getelementptr inbounds double, double* [[X]], i64 3 +; ANY-NEXT: [[TMP9:%.*]] = bitcast double* [[TMP7]] to <2 x double>* +; ANY-NEXT: [[TMP10:%.*]] = load <2 x double>, <2 x double>* [[TMP9]], align 8 +; ANY-NEXT: [[TMP11:%.*]] = fadd <2 x double> [[TMP10]], [[TMP10]] +; ANY-NEXT: [[TMP12:%.*]] = fadd <2 x double> [[TMP11]], [[TMP10]] +; ANY-NEXT: [[TMP13:%.*]] = bitcast double* [[TMP7]] to <2 x double>* +; ANY-NEXT: store <2 x double> [[TMP12]], <2 x double>* [[TMP13]], align 8 +; ANY-NEXT: ret void +; + %1 = load double, double* %x, align 8 + %2 = fadd double %1, %1 + %3 = fadd double %2, %1 + store double %3, double* %x, align 8 + %4 = getelementptr inbounds double, double* %x, i64 1 + %5 = load double, double* %4, align 8 + %6 = fadd double %5, %5 + %7 = fadd double %6, %5 + store double %7, double* %4, align 8 + %8 = getelementptr inbounds double, double* %x, i64 2 + %9 = load double, double* %8, align 8 + %10 = fadd double %9, %9 + %11 = fadd double %10, %9 + store double %11, double* %8, align 8 + %12 = getelementptr inbounds double, double* %x, i64 3 + %13 = load double, double* %12, align 8 + %14 = fadd double %13, %13 + %15 = fadd double %14, %13 + store double %15, double* %12, align 8 + ret void +} + +attributes #0 = { "prefer-vector-width"="128" } From 7febdb7f27dfd3c5aab75ee07e1d36d30124fecc Mon Sep 17 00:00:00 2001 From: Nicolai Haehnle Date: Tue, 8 Oct 2019 12:46:20 +0000 Subject: [PATCH 233/254] MachineSSAUpdater: insert IMPLICIT_DEF at top of basic block Summary: When getValueInMiddleOfBlock happens to be called for a basic block that has no incoming value at all, an IMPLICIT_DEF is inserted in that block via GetValueAtEndOfBlockInternal. This IMPLICIT_DEF must be at the top of its basic block or it will likely not reach the use that the caller intends to insert. Issue: https://github.com/GPUOpen-Drivers/llpc/issues/204 Reviewers: arsenm, rampitec Subscribers: jvesely, wdng, hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D68183 llvm-svn: 374040 --- llvm/lib/CodeGen/MachineSSAUpdater.cpp | 2 +- llvm/test/CodeGen/AMDGPU/si-i1-copies.mir | 28 +++++++++++++++++++++++ 2 files changed, 29 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/AMDGPU/si-i1-copies.mir diff --git a/llvm/lib/CodeGen/MachineSSAUpdater.cpp b/llvm/lib/CodeGen/MachineSSAUpdater.cpp index 3370b52ede40b..258a5f9e04820 100644 --- a/llvm/lib/CodeGen/MachineSSAUpdater.cpp +++ b/llvm/lib/CodeGen/MachineSSAUpdater.cpp @@ -292,7 +292,7 @@ class SSAUpdaterTraits { MachineSSAUpdater *Updater) { // Insert an implicit_def to represent an undef value. MachineInstr *NewDef = InsertNewDef(TargetOpcode::IMPLICIT_DEF, - BB, BB->getFirstTerminator(), + BB, BB->getFirstNonPHI(), Updater->VRC, Updater->MRI, Updater->TII); return NewDef->getOperand(0).getReg(); diff --git a/llvm/test/CodeGen/AMDGPU/si-i1-copies.mir b/llvm/test/CodeGen/AMDGPU/si-i1-copies.mir new file mode 100644 index 0000000000000..2cb854b918ed9 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/si-i1-copies.mir @@ -0,0 +1,28 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -run-pass=si-i1-copies -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GCN %s + +# Test that the new IMPLICIT_DEF is inserted in the correct location. +--- +name: test_undef +tracksRegLiveness: true +body: | + ; GCN-LABEL: name: test_undef + ; GCN: bb.0: + ; GCN: successors: %bb.1(0x80000000) + ; GCN: S_BRANCH %bb.1 + ; GCN: bb.1: + ; GCN: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF + ; GCN: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY [[DEF]] + ; GCN: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[COPY]], implicit $exec + bb.0: + successors: %bb.1 + + %0:vreg_1 = IMPLICIT_DEF + S_BRANCH %bb.1 + + bb.1: + %1:vreg_1 = PHI %0, %bb.0 + %2:sreg_64_xexec = COPY %1 + %3:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %2, implicit $exec + +... From df6e67697bfbfe364c65d75b5c01279dacc43aad Mon Sep 17 00:00:00 2001 From: Nicolai Haehnle Date: Tue, 8 Oct 2019 12:46:32 +0000 Subject: [PATCH 234/254] AMDGPU: Propagate undef flag during pre-RA exec mask optimizations Summary: Issue: https://github.com/GPUOpen-Drivers/llpc/issues/204 Reviewers: arsenm, rampitec Subscribers: kzhuravl, jvesely, wdng, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D68184 llvm-svn: 374041 --- .../AMDGPU/SIOptimizeExecMaskingPreRA.cpp | 13 +++++----- .../AMDGPU/optimize-exec-masking-pre-ra.mir | 25 ++++++++++++++++++- 2 files changed, 31 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp index 681c3b35f75de..fdd30db6a7cb5 100644 --- a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp +++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp @@ -250,15 +250,16 @@ static unsigned optimizeVcndVcmpPair(MachineBasicBlock &MBB, Op1->getImm() != 0 || Op2->getImm() != 1) return AMDGPU::NoRegister; - LLVM_DEBUG(dbgs() << "Folding sequence:\n\t" << *Sel << '\t' - << *Cmp << '\t' << *And); + LLVM_DEBUG(dbgs() << "Folding sequence:\n\t" << *Sel << '\t' << *Cmp << '\t' + << *And); Register CCReg = CC->getReg(); LIS->RemoveMachineInstrFromMaps(*And); - MachineInstr *Andn2 = BuildMI(MBB, *And, And->getDebugLoc(), - TII->get(Andn2Opc), And->getOperand(0).getReg()) - .addReg(ExecReg) - .addReg(CCReg, 0, CC->getSubReg()); + MachineInstr *Andn2 = + BuildMI(MBB, *And, And->getDebugLoc(), TII->get(Andn2Opc), + And->getOperand(0).getReg()) + .addReg(ExecReg) + .addReg(CCReg, getUndefRegState(CC->isUndef()), CC->getSubReg()); And->eraseFromParent(); LIS->InsertMachineInstrInMaps(*Andn2); diff --git a/llvm/test/CodeGen/AMDGPU/optimize-exec-masking-pre-ra.mir b/llvm/test/CodeGen/AMDGPU/optimize-exec-masking-pre-ra.mir index a2fec4d298b11..0ea085afc4051 100644 --- a/llvm/test/CodeGen/AMDGPU/optimize-exec-masking-pre-ra.mir +++ b/llvm/test/CodeGen/AMDGPU/optimize-exec-masking-pre-ra.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -run-pass=si-optimize-exec-masking-pre-ra %s -o - | FileCheck -check-prefix=GCN %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -run-pass=si-optimize-exec-masking-pre-ra -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s # Check for regression from assuming an instruction was a copy after # dropping the opcode check. @@ -95,3 +95,26 @@ body: | $exec = S_OR_B64 $exec, %7, implicit-def $scc ... + +# When folding a v_cndmask and a v_cmp in a pattern leading to +# s_cbranch_vccz, ensure that an undef operand is handled correctly. +--- +name: cndmask_cmp_cbranch_fold_undef +tracksRegLiveness: true +body: | + ; GCN-LABEL: name: cndmask_cmp_cbranch_fold_undef + ; GCN: bb.0: + ; GCN: successors: %bb.1(0x80000000) + ; GCN: $vcc = S_ANDN2_B64 $exec, undef %1:sreg_64_xexec, implicit-def $scc + ; GCN: S_CBRANCH_VCCZ %bb.1, implicit $vcc + ; GCN: bb.1: + bb.0: + + %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, undef %0:sreg_64_xexec, implicit $exec + V_CMP_NE_U32_e32 1, %1, implicit-def $vcc, implicit $exec + $vcc = S_AND_B64 $exec, $vcc, implicit-def dead $scc + S_CBRANCH_VCCZ %bb.1, implicit $vcc + + bb.1: + +... From b302561b763a1d2eb1a450e135b8d49931936755 Mon Sep 17 00:00:00 2001 From: Graham Hunter Date: Tue, 8 Oct 2019 12:53:54 +0000 Subject: [PATCH 235/254] [SVE][IR] Scalable Vector size queries and IR instruction support * Adds a TypeSize struct to represent the known minimum size of a type along with a flag to indicate that the runtime size is a integer multiple of that size * Converts existing size query functions from Type.h and DataLayout.h to return a TypeSize result * Adds convenience methods (including a transparent conversion operator to uint64_t) so that most existing code 'just works' as if the return values were still scalars. * Uses the new size queries along with ElementCount to ensure that all supported instructions used with scalable vectors can be constructed in IR. Reviewers: hfinkel, lattner, rkruppe, greened, rovka, rengolin, sdesmalen Reviewed By: rovka, sdesmalen Differential Revision: https://reviews.llvm.org/D53137 llvm-svn: 374042 --- clang/lib/CodeGen/CGCall.cpp | 8 +- clang/lib/CodeGen/CGStmt.cpp | 12 +- clang/lib/CodeGen/CodeGenFunction.cpp | 8 +- llvm/include/llvm/ADT/DenseMapInfo.h | 2 +- llvm/include/llvm/IR/DataLayout.h | 55 ++- llvm/include/llvm/IR/DerivedTypes.h | 2 +- llvm/include/llvm/IR/InstrTypes.h | 2 +- llvm/include/llvm/IR/Type.h | 6 +- llvm/include/llvm/Support/MachineValueType.h | 2 +- llvm/include/llvm/Support/ScalableSize.h | 46 -- llvm/include/llvm/Support/TypeSize.h | 200 +++++++++ llvm/lib/Analysis/InlineCost.cpp | 6 +- llvm/lib/CodeGen/Analysis.cpp | 3 +- llvm/lib/IR/DataLayout.cpp | 6 +- llvm/lib/IR/Instructions.cpp | 17 +- llvm/lib/IR/Type.cpp | 27 +- .../Target/AArch64/AArch64ISelLowering.cpp | 2 +- llvm/lib/Transforms/Scalar/SROA.cpp | 6 +- llvm/test/Other/scalable-vectors-core-ir.ll | 393 ++++++++++++++++++ .../CodeGen/ScalableVectorMVTsTest.cpp | 2 +- llvm/unittests/IR/VectorTypesTest.cpp | 116 +++++- 21 files changed, 811 insertions(+), 110 deletions(-) delete mode 100644 llvm/include/llvm/Support/ScalableSize.h create mode 100644 llvm/include/llvm/Support/TypeSize.h create mode 100644 llvm/test/Other/scalable-vectors-core-ir.ll diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index e851d7bafd762..b016b46acfe5b 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -4277,8 +4277,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, // Update the largest vector width if any arguments have vector types. for (unsigned i = 0; i < IRCallArgs.size(); ++i) { if (auto *VT = dyn_cast(IRCallArgs[i]->getType())) - LargestVectorWidth = std::max(LargestVectorWidth, - VT->getPrimitiveSizeInBits()); + LargestVectorWidth = std::max((uint64_t)LargestVectorWidth, + VT->getPrimitiveSizeInBits().getFixedSize()); } // Compute the calling convention and attributes. @@ -4361,8 +4361,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, // Update largest vector width from the return type. if (auto *VT = dyn_cast(CI->getType())) - LargestVectorWidth = std::max(LargestVectorWidth, - VT->getPrimitiveSizeInBits()); + LargestVectorWidth = std::max((uint64_t)LargestVectorWidth, + VT->getPrimitiveSizeInBits().getFixedSize()); // Insert instrumentation or attach profile metadata at indirect call sites. // For more details, see the comment before the definition of diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp index 707db04a8923e..c058b4b4ba803 100644 --- a/clang/lib/CodeGen/CGStmt.cpp +++ b/clang/lib/CodeGen/CGStmt.cpp @@ -2073,8 +2073,8 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { // Update largest vector width for any vector types. if (auto *VT = dyn_cast(ResultRegTypes.back())) - LargestVectorWidth = std::max(LargestVectorWidth, - VT->getPrimitiveSizeInBits()); + LargestVectorWidth = std::max((uint64_t)LargestVectorWidth, + VT->getPrimitiveSizeInBits().getFixedSize()); } else { ArgTypes.push_back(Dest.getAddress().getType()); Args.push_back(Dest.getPointer()); @@ -2098,8 +2098,8 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { // Update largest vector width for any vector types. if (auto *VT = dyn_cast(Arg->getType())) - LargestVectorWidth = std::max(LargestVectorWidth, - VT->getPrimitiveSizeInBits()); + LargestVectorWidth = std::max((uint64_t)LargestVectorWidth, + VT->getPrimitiveSizeInBits().getFixedSize()); if (Info.allowsRegister()) InOutConstraints += llvm::utostr(i); else @@ -2185,8 +2185,8 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { // Update largest vector width for any vector types. if (auto *VT = dyn_cast(Arg->getType())) - LargestVectorWidth = std::max(LargestVectorWidth, - VT->getPrimitiveSizeInBits()); + LargestVectorWidth = std::max((uint64_t)LargestVectorWidth, + VT->getPrimitiveSizeInBits().getFixedSize()); ArgTypes.push_back(Arg->getType()); Args.push_back(Arg); diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp index e934ab34a3a23..41b7f2f4b1be9 100644 --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -431,13 +431,13 @@ void CodeGenFunction::FinishFunction(SourceLocation EndLoc) { // Scan function arguments for vector width. for (llvm::Argument &A : CurFn->args()) if (auto *VT = dyn_cast(A.getType())) - LargestVectorWidth = std::max(LargestVectorWidth, - VT->getPrimitiveSizeInBits()); + LargestVectorWidth = std::max((uint64_t)LargestVectorWidth, + VT->getPrimitiveSizeInBits().getFixedSize()); // Update vector width based on return type. if (auto *VT = dyn_cast(CurFn->getReturnType())) - LargestVectorWidth = std::max(LargestVectorWidth, - VT->getPrimitiveSizeInBits()); + LargestVectorWidth = std::max((uint64_t)LargestVectorWidth, + VT->getPrimitiveSizeInBits().getFixedSize()); // Add the required-vector-width attribute. This contains the max width from: // 1. min-vector-width attribute used in the source program. diff --git a/llvm/include/llvm/ADT/DenseMapInfo.h b/llvm/include/llvm/ADT/DenseMapInfo.h index 5ef6f3ad1b046..c4e8f2d4abc9b 100644 --- a/llvm/include/llvm/ADT/DenseMapInfo.h +++ b/llvm/include/llvm/ADT/DenseMapInfo.h @@ -17,7 +17,7 @@ #include "llvm/ADT/Hashing.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/PointerLikeTypeTraits.h" -#include "llvm/Support/ScalableSize.h" +#include "llvm/Support/TypeSize.h" #include #include #include diff --git a/llvm/include/llvm/IR/DataLayout.h b/llvm/include/llvm/IR/DataLayout.h index b33cb497d6e40..022d2e944b5af 100644 --- a/llvm/include/llvm/IR/DataLayout.h +++ b/llvm/include/llvm/IR/DataLayout.h @@ -30,6 +30,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/Alignment.h" +#include "llvm/Support/TypeSize.h" #include #include #include @@ -437,23 +438,33 @@ class DataLayout { /// Returns the number of bits necessary to hold the specified type. /// + /// If Ty is a scalable vector type, the scalable property will be set and + /// the runtime size will be a positive integer multiple of the base size. + /// /// For example, returns 36 for i36 and 80 for x86_fp80. The type passed must /// have a size (Type::isSized() must return true). - uint64_t getTypeSizeInBits(Type *Ty) const; + TypeSize getTypeSizeInBits(Type *Ty) const; /// Returns the maximum number of bytes that may be overwritten by /// storing the specified type. /// + /// If Ty is a scalable vector type, the scalable property will be set and + /// the runtime size will be a positive integer multiple of the base size. + /// /// For example, returns 5 for i36 and 10 for x86_fp80. - uint64_t getTypeStoreSize(Type *Ty) const { - return (getTypeSizeInBits(Ty) + 7) / 8; + TypeSize getTypeStoreSize(Type *Ty) const { + auto BaseSize = getTypeSizeInBits(Ty); + return { (BaseSize.getKnownMinSize() + 7) / 8, BaseSize.isScalable() }; } /// Returns the maximum number of bits that may be overwritten by /// storing the specified type; always a multiple of 8. /// + /// If Ty is a scalable vector type, the scalable property will be set and + /// the runtime size will be a positive integer multiple of the base size. + /// /// For example, returns 40 for i36 and 80 for x86_fp80. - uint64_t getTypeStoreSizeInBits(Type *Ty) const { + TypeSize getTypeStoreSizeInBits(Type *Ty) const { return 8 * getTypeStoreSize(Ty); } @@ -468,9 +479,12 @@ class DataLayout { /// Returns the offset in bytes between successive objects of the /// specified type, including alignment padding. /// + /// If Ty is a scalable vector type, the scalable property will be set and + /// the runtime size will be a positive integer multiple of the base size. + /// /// This is the amount that alloca reserves for this type. For example, /// returns 12 or 16 for x86_fp80, depending on alignment. - uint64_t getTypeAllocSize(Type *Ty) const { + TypeSize getTypeAllocSize(Type *Ty) const { // Round up to the next alignment boundary. return alignTo(getTypeStoreSize(Ty), getABITypeAlignment(Ty)); } @@ -478,9 +492,12 @@ class DataLayout { /// Returns the offset in bits between successive objects of the /// specified type, including alignment padding; always a multiple of 8. /// + /// If Ty is a scalable vector type, the scalable property will be set and + /// the runtime size will be a positive integer multiple of the base size. + /// /// This is the amount that alloca reserves for this type. For example, /// returns 96 or 128 for x86_fp80, depending on alignment. - uint64_t getTypeAllocSizeInBits(Type *Ty) const { + TypeSize getTypeAllocSizeInBits(Type *Ty) const { return 8 * getTypeAllocSize(Ty); } @@ -598,13 +615,13 @@ class StructLayout { // The implementation of this method is provided inline as it is particularly // well suited to constant folding when called on a specific Type subclass. -inline uint64_t DataLayout::getTypeSizeInBits(Type *Ty) const { +inline TypeSize DataLayout::getTypeSizeInBits(Type *Ty) const { assert(Ty->isSized() && "Cannot getTypeInfo() on a type that is unsized!"); switch (Ty->getTypeID()) { case Type::LabelTyID: - return getPointerSizeInBits(0); + return TypeSize::Fixed(getPointerSizeInBits(0)); case Type::PointerTyID: - return getPointerSizeInBits(Ty->getPointerAddressSpace()); + return TypeSize::Fixed(getPointerSizeInBits(Ty->getPointerAddressSpace())); case Type::ArrayTyID: { ArrayType *ATy = cast(Ty); return ATy->getNumElements() * @@ -612,26 +629,30 @@ inline uint64_t DataLayout::getTypeSizeInBits(Type *Ty) const { } case Type::StructTyID: // Get the layout annotation... which is lazily created on demand. - return getStructLayout(cast(Ty))->getSizeInBits(); + return TypeSize::Fixed( + getStructLayout(cast(Ty))->getSizeInBits()); case Type::IntegerTyID: - return Ty->getIntegerBitWidth(); + return TypeSize::Fixed(Ty->getIntegerBitWidth()); case Type::HalfTyID: - return 16; + return TypeSize::Fixed(16); case Type::FloatTyID: - return 32; + return TypeSize::Fixed(32); case Type::DoubleTyID: case Type::X86_MMXTyID: - return 64; + return TypeSize::Fixed(64); case Type::PPC_FP128TyID: case Type::FP128TyID: - return 128; + return TypeSize::Fixed(128); // In memory objects this is always aligned to a higher boundary, but // only 80 bits contain information. case Type::X86_FP80TyID: - return 80; + return TypeSize::Fixed(80); case Type::VectorTyID: { VectorType *VTy = cast(Ty); - return VTy->getNumElements() * getTypeSizeInBits(VTy->getElementType()); + auto EltCnt = VTy->getElementCount(); + uint64_t MinBits = EltCnt.Min * + getTypeSizeInBits(VTy->getElementType()).getFixedSize(); + return TypeSize(MinBits, EltCnt.Scalable); } default: llvm_unreachable("DataLayout::getTypeSizeInBits(): Unsupported type"); diff --git a/llvm/include/llvm/IR/DerivedTypes.h b/llvm/include/llvm/IR/DerivedTypes.h index 4cac62150a2bd..ade63764ebb1f 100644 --- a/llvm/include/llvm/IR/DerivedTypes.h +++ b/llvm/include/llvm/IR/DerivedTypes.h @@ -23,7 +23,7 @@ #include "llvm/IR/Type.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Compiler.h" -#include "llvm/Support/ScalableSize.h" +#include "llvm/Support/TypeSize.h" #include #include diff --git a/llvm/include/llvm/IR/InstrTypes.h b/llvm/include/llvm/IR/InstrTypes.h index ca419b50da6b7..3c5051deebb90 100644 --- a/llvm/include/llvm/IR/InstrTypes.h +++ b/llvm/include/llvm/IR/InstrTypes.h @@ -975,7 +975,7 @@ class CmpInst : public Instruction { static Type* makeCmpResultType(Type* opnd_type) { if (VectorType* vt = dyn_cast(opnd_type)) { return VectorType::get(Type::getInt1Ty(opnd_type->getContext()), - vt->getNumElements()); + vt->getElementCount()); } return Type::getInt1Ty(opnd_type->getContext()); } diff --git a/llvm/include/llvm/IR/Type.h b/llvm/include/llvm/IR/Type.h index 34271aae49c30..63bc884f0b7d4 100644 --- a/llvm/include/llvm/IR/Type.h +++ b/llvm/include/llvm/IR/Type.h @@ -21,6 +21,7 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TypeSize.h" #include #include #include @@ -281,12 +282,15 @@ class Type { /// This will return zero if the type does not have a size or is not a /// primitive type. /// + /// If this is a scalable vector type, the scalable property will be set and + /// the runtime size will be a positive integer multiple of the base size. + /// /// Note that this may not reflect the size of memory allocated for an /// instance of the type or the number of bytes that are written when an /// instance of the type is stored to memory. The DataLayout class provides /// additional query functions to provide this information. /// - unsigned getPrimitiveSizeInBits() const LLVM_READONLY; + TypeSize getPrimitiveSizeInBits() const LLVM_READONLY; /// If this is a vector type, return the getPrimitiveSizeInBits value for the /// element type. Otherwise return the getPrimitiveSizeInBits value for this diff --git a/llvm/include/llvm/Support/MachineValueType.h b/llvm/include/llvm/Support/MachineValueType.h index bc617e3b90e6f..7f9f0b85c55e1 100644 --- a/llvm/include/llvm/Support/MachineValueType.h +++ b/llvm/include/llvm/Support/MachineValueType.h @@ -17,7 +17,7 @@ #include "llvm/ADT/iterator_range.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/ScalableSize.h" +#include "llvm/Support/TypeSize.h" #include namespace llvm { diff --git a/llvm/include/llvm/Support/ScalableSize.h b/llvm/include/llvm/Support/ScalableSize.h deleted file mode 100644 index a057d5ea7ce59..0000000000000 --- a/llvm/include/llvm/Support/ScalableSize.h +++ /dev/null @@ -1,46 +0,0 @@ -//===- ScalableSize.h - Scalable vector size info ---------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file provides a struct that can be used to query the size of IR types -// which may be scalable vectors. It provides convenience operators so that -// it can be used in much the same way as a single scalar value. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_SUPPORT_SCALABLESIZE_H -#define LLVM_SUPPORT_SCALABLESIZE_H - -namespace llvm { - -class ElementCount { -public: - unsigned Min; // Minimum number of vector elements. - bool Scalable; // If true, NumElements is a multiple of 'Min' determined - // at runtime rather than compile time. - - ElementCount(unsigned Min, bool Scalable) - : Min(Min), Scalable(Scalable) {} - - ElementCount operator*(unsigned RHS) { - return { Min * RHS, Scalable }; - } - ElementCount operator/(unsigned RHS) { - return { Min / RHS, Scalable }; - } - - bool operator==(const ElementCount& RHS) const { - return Min == RHS.Min && Scalable == RHS.Scalable; - } - bool operator!=(const ElementCount& RHS) const { - return !(*this == RHS); - } -}; - -} // end namespace llvm - -#endif // LLVM_SUPPORT_SCALABLESIZE_H diff --git a/llvm/include/llvm/Support/TypeSize.h b/llvm/include/llvm/Support/TypeSize.h new file mode 100644 index 0000000000000..d93919acd9dc0 --- /dev/null +++ b/llvm/include/llvm/Support/TypeSize.h @@ -0,0 +1,200 @@ +//===- TypeSize.h - Wrapper around type sizes -------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file provides a struct that can be used to query the size of IR types +// which may be scalable vectors. It provides convenience operators so that +// it can be used in much the same way as a single scalar value. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_TYPESIZE_H +#define LLVM_SUPPORT_TYPESIZE_H + +#include + +namespace llvm { + +class ElementCount { +public: + unsigned Min; // Minimum number of vector elements. + bool Scalable; // If true, NumElements is a multiple of 'Min' determined + // at runtime rather than compile time. + + ElementCount(unsigned Min, bool Scalable) + : Min(Min), Scalable(Scalable) {} + + ElementCount operator*(unsigned RHS) { + return { Min * RHS, Scalable }; + } + ElementCount operator/(unsigned RHS) { + return { Min / RHS, Scalable }; + } + + bool operator==(const ElementCount& RHS) const { + return Min == RHS.Min && Scalable == RHS.Scalable; + } + bool operator!=(const ElementCount& RHS) const { + return !(*this == RHS); + } +}; + +// This class is used to represent the size of types. If the type is of fixed +// size, it will represent the exact size. If the type is a scalable vector, +// it will represent the known minimum size. +class TypeSize { + uint64_t MinSize; // The known minimum size. + bool IsScalable; // If true, then the runtime size is an integer multiple + // of MinSize. + +public: + constexpr TypeSize(uint64_t MinSize, bool Scalable) + : MinSize(MinSize), IsScalable(Scalable) {} + + static constexpr TypeSize Fixed(uint64_t Size) { + return TypeSize(Size, /*IsScalable=*/false); + } + + static constexpr TypeSize Scalable(uint64_t MinSize) { + return TypeSize(MinSize, /*IsScalable=*/true); + } + + // Scalable vector types with the same minimum size as a fixed size type are + // not guaranteed to be the same size at runtime, so they are never + // considered to be equal. + friend bool operator==(const TypeSize &LHS, const TypeSize &RHS) { + return std::tie(LHS.MinSize, LHS.IsScalable) == + std::tie(RHS.MinSize, RHS.IsScalable); + } + + friend bool operator!=(const TypeSize &LHS, const TypeSize &RHS) { + return !(LHS == RHS); + } + + // For many cases, size ordering between scalable and fixed size types cannot + // be determined at compile time, so such comparisons aren't allowed. + // + // e.g. could be bigger than <4 x i32> with a runtime + // vscale >= 5, equal sized with a vscale of 4, and smaller with + // a vscale <= 3. + // + // If the scalable flags match, just perform the requested comparison + // between the minimum sizes. + friend bool operator<(const TypeSize &LHS, const TypeSize &RHS) { + assert(LHS.IsScalable == RHS.IsScalable && + "Ordering comparison of scalable and fixed types"); + + return LHS.MinSize < RHS.MinSize; + } + + friend bool operator>(const TypeSize &LHS, const TypeSize &RHS) { + return RHS < LHS; + } + + friend bool operator<=(const TypeSize &LHS, const TypeSize &RHS) { + return !(RHS < LHS); + } + + friend bool operator>=(const TypeSize &LHS, const TypeSize& RHS) { + return !(LHS < RHS); + } + + // Convenience operators to obtain relative sizes independently of + // the scalable flag. + TypeSize operator*(unsigned RHS) const { + return { MinSize * RHS, IsScalable }; + } + + friend TypeSize operator*(const unsigned LHS, const TypeSize &RHS) { + return { LHS * RHS.MinSize, RHS.IsScalable }; + } + + TypeSize operator/(unsigned RHS) const { + return { MinSize / RHS, IsScalable }; + } + + // Return the minimum size with the assumption that the size is exact. + // Use in places where a scalable size doesn't make sense (e.g. non-vector + // types, or vectors in backends which don't support scalable vectors) + uint64_t getFixedSize() const { + assert(!IsScalable && "Request for a fixed size on a scalable object"); + return MinSize; + } + + // Return the known minimum size. Use in places where the scalable property + // doesn't matter (e.g. determining alignment) or in conjunction with the + // isScalable method below. + uint64_t getKnownMinSize() const { + return MinSize; + } + + // Return whether or not the size is scalable. + bool isScalable() const { + return IsScalable; + } + + // Casts to a uint64_t if this is a fixed-width size. + // + // NOTE: This interface is obsolete and will be removed in a future version + // of LLVM in favour of calling getFixedSize() directly + operator uint64_t() const { + return getFixedSize(); + } + + // Additional convenience operators needed to avoid ambiguous parses + // TODO: Make uint64_t the default operator? + TypeSize operator*(uint64_t RHS) const { + return { MinSize * RHS, IsScalable }; + } + + TypeSize operator*(int RHS) const { + return { MinSize * RHS, IsScalable }; + } + + TypeSize operator*(int64_t RHS) const { + return { MinSize * RHS, IsScalable }; + } + + friend TypeSize operator*(const uint64_t LHS, const TypeSize &RHS) { + return { LHS * RHS.MinSize, RHS.IsScalable }; + } + + friend TypeSize operator*(const int LHS, const TypeSize &RHS) { + return { LHS * RHS.MinSize, RHS.IsScalable }; + } + + friend TypeSize operator*(const int64_t LHS, const TypeSize &RHS) { + return { LHS * RHS.MinSize, RHS.IsScalable }; + } + + TypeSize operator/(uint64_t RHS) const { + return { MinSize / RHS, IsScalable }; + } + + TypeSize operator/(int RHS) const { + return { MinSize / RHS, IsScalable }; + } + + TypeSize operator/(int64_t RHS) const { + return { MinSize / RHS, IsScalable }; + } +}; + +/// Returns a TypeSize with a known minimum size that is the next integer +/// (mod 2**64) that is greater than or equal to \p Value and is a multiple +/// of \p Align. \p Align must be non-zero. +/// +/// Similar to the alignTo functions in MathExtras.h +inline TypeSize alignTo(TypeSize Size, uint64_t Align) { + assert(Align != 0u && "Align must be non-zero"); + return {(Size.getKnownMinSize() + Align - 1) / Align * Align, + Size.isScalable()}; +} + +} // end namespace llvm + +#endif // LLVM_SUPPORT_TypeSize_H diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp index 57dee459fc2cb..89811ec0e377a 100644 --- a/llvm/lib/Analysis/InlineCost.cpp +++ b/llvm/lib/Analysis/InlineCost.cpp @@ -436,7 +436,8 @@ bool CallAnalyzer::visitAlloca(AllocaInst &I) { if (auto *AllocSize = dyn_cast_or_null(Size)) { Type *Ty = I.getAllocatedType(); AllocatedSize = SaturatingMultiplyAdd( - AllocSize->getLimitedValue(), DL.getTypeAllocSize(Ty), AllocatedSize); + AllocSize->getLimitedValue(), DL.getTypeAllocSize(Ty).getFixedSize(), + AllocatedSize); return Base::visitAlloca(I); } } @@ -444,7 +445,8 @@ bool CallAnalyzer::visitAlloca(AllocaInst &I) { // Accumulate the allocated size. if (I.isStaticAlloca()) { Type *Ty = I.getAllocatedType(); - AllocatedSize = SaturatingAdd(DL.getTypeAllocSize(Ty), AllocatedSize); + AllocatedSize = SaturatingAdd(DL.getTypeAllocSize(Ty).getFixedSize(), + AllocatedSize); } // We will happily inline static alloca instructions. diff --git a/llvm/lib/CodeGen/Analysis.cpp b/llvm/lib/CodeGen/Analysis.cpp index 6c059665fca32..4f24f077d1209 100644 --- a/llvm/lib/CodeGen/Analysis.cpp +++ b/llvm/lib/CodeGen/Analysis.cpp @@ -309,7 +309,8 @@ static const Value *getNoopInput(const Value *V, NoopInput = Op; } else if (isa(I) && TLI.allowTruncateForTailCall(Op->getType(), I->getType())) { - DataBits = std::min(DataBits, I->getType()->getPrimitiveSizeInBits()); + DataBits = std::min((uint64_t)DataBits, + I->getType()->getPrimitiveSizeInBits().getFixedSize()); NoopInput = Op; } else if (auto CS = ImmutableCallSite(I)) { const Value *ReturnedOp = CS.getReturnedArgOperand(); diff --git a/llvm/lib/IR/DataLayout.cpp b/llvm/lib/IR/DataLayout.cpp index b125d1550c697..5fe7a2e94b6a4 100644 --- a/llvm/lib/IR/DataLayout.cpp +++ b/llvm/lib/IR/DataLayout.cpp @@ -29,6 +29,7 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Support/TypeSize.h" #include #include #include @@ -745,7 +746,10 @@ Align DataLayout::getAlignment(Type *Ty, bool abi_or_pref) const { llvm_unreachable("Bad type for getAlignment!!!"); } - return getAlignmentInfo(AlignType, getTypeSizeInBits(Ty), abi_or_pref, Ty); + // If we're dealing with a scalable vector, we just need the known minimum + // size for determining alignment. If not, we'll get the exact size. + return getAlignmentInfo(AlignType, getTypeSizeInBits(Ty).getKnownMinSize(), + abi_or_pref, Ty); } unsigned DataLayout::getABITypeAlignment(Type *Ty) const { diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp index de1317ea9d3fe..20331803f604e 100644 --- a/llvm/lib/IR/Instructions.cpp +++ b/llvm/lib/IR/Instructions.cpp @@ -38,6 +38,7 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Support/TypeSize.h" #include #include #include @@ -1792,7 +1793,7 @@ ShuffleVectorInst::ShuffleVectorInst(Value *V1, Value *V2, Value *Mask, const Twine &Name, Instruction *InsertBefore) : Instruction(VectorType::get(cast(V1->getType())->getElementType(), - cast(Mask->getType())->getNumElements()), + cast(Mask->getType())->getElementCount()), ShuffleVector, OperandTraits::op_begin(this), OperandTraits::operands(this), @@ -1809,7 +1810,7 @@ ShuffleVectorInst::ShuffleVectorInst(Value *V1, Value *V2, Value *Mask, const Twine &Name, BasicBlock *InsertAtEnd) : Instruction(VectorType::get(cast(V1->getType())->getElementType(), - cast(Mask->getType())->getNumElements()), + cast(Mask->getType())->getElementCount()), ShuffleVector, OperandTraits::op_begin(this), OperandTraits::operands(this), @@ -2982,8 +2983,8 @@ bool CastInst::isCastable(Type *SrcTy, Type *DestTy) { } // Get the bit sizes, we'll need these - unsigned SrcBits = SrcTy->getPrimitiveSizeInBits(); // 0 for ptr - unsigned DestBits = DestTy->getPrimitiveSizeInBits(); // 0 for ptr + auto SrcBits = SrcTy->getPrimitiveSizeInBits(); // 0 for ptr + auto DestBits = DestTy->getPrimitiveSizeInBits(); // 0 for ptr // Run through the possibilities ... if (DestTy->isIntegerTy()) { // Casting to integral @@ -3030,7 +3031,7 @@ bool CastInst::isBitCastable(Type *SrcTy, Type *DestTy) { if (VectorType *SrcVecTy = dyn_cast(SrcTy)) { if (VectorType *DestVecTy = dyn_cast(DestTy)) { - if (SrcVecTy->getNumElements() == DestVecTy->getNumElements()) { + if (SrcVecTy->getElementCount() == DestVecTy->getElementCount()) { // An element by element cast. Valid if casting the elements is valid. SrcTy = SrcVecTy->getElementType(); DestTy = DestVecTy->getElementType(); @@ -3044,12 +3045,12 @@ bool CastInst::isBitCastable(Type *SrcTy, Type *DestTy) { } } - unsigned SrcBits = SrcTy->getPrimitiveSizeInBits(); // 0 for ptr - unsigned DestBits = DestTy->getPrimitiveSizeInBits(); // 0 for ptr + auto SrcBits = SrcTy->getPrimitiveSizeInBits(); // 0 for ptr + auto DestBits = DestTy->getPrimitiveSizeInBits(); // 0 for ptr // Could still have vectors of pointers if the number of elements doesn't // match - if (SrcBits == 0 || DestBits == 0) + if (SrcBits.getKnownMinSize() == 0 || DestBits.getKnownMinSize() == 0) return false; if (SrcBits != DestBits) diff --git a/llvm/lib/IR/Type.cpp b/llvm/lib/IR/Type.cpp index 8ece7f223dd2e..3eab5042b5424 100644 --- a/llvm/lib/IR/Type.cpp +++ b/llvm/lib/IR/Type.cpp @@ -26,6 +26,7 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Support/TypeSize.h" #include #include @@ -111,18 +112,22 @@ bool Type::isEmptyTy() const { return false; } -unsigned Type::getPrimitiveSizeInBits() const { +TypeSize Type::getPrimitiveSizeInBits() const { switch (getTypeID()) { - case Type::HalfTyID: return 16; - case Type::FloatTyID: return 32; - case Type::DoubleTyID: return 64; - case Type::X86_FP80TyID: return 80; - case Type::FP128TyID: return 128; - case Type::PPC_FP128TyID: return 128; - case Type::X86_MMXTyID: return 64; - case Type::IntegerTyID: return cast(this)->getBitWidth(); - case Type::VectorTyID: return cast(this)->getBitWidth(); - default: return 0; + case Type::HalfTyID: return TypeSize::Fixed(16); + case Type::FloatTyID: return TypeSize::Fixed(32); + case Type::DoubleTyID: return TypeSize::Fixed(64); + case Type::X86_FP80TyID: return TypeSize::Fixed(80); + case Type::FP128TyID: return TypeSize::Fixed(128); + case Type::PPC_FP128TyID: return TypeSize::Fixed(128); + case Type::X86_MMXTyID: return TypeSize::Fixed(64); + case Type::IntegerTyID: + return TypeSize::Fixed(cast(this)->getBitWidth()); + case Type::VectorTyID: { + const VectorType *VTy = cast(this); + return TypeSize(VTy->getBitWidth(), VTy->isScalable()); + } + default: return TypeSize::Fixed(0); } } diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index c7302b45f6516..ee62b6dfe36b6 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -8526,7 +8526,7 @@ bool AArch64TargetLowering::isExtFreeImpl(const Instruction *Ext) const { // Get the shift amount based on the scaling factor: // log2(sizeof(IdxTy)) - log2(8). uint64_t ShiftAmt = - countTrailingZeros(DL.getTypeStoreSizeInBits(IdxTy)) - 3; + countTrailingZeros(DL.getTypeStoreSizeInBits(IdxTy).getFixedSize()) - 3; // Is the constant foldable in the shift of the addressing mode? // I.e., shift amount is between 1 and 4 inclusive. if (ShiftAmt == 0 || ShiftAmt > 4) diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp index c1e935fda7f86..4b816832c31ec 100644 --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -959,14 +959,16 @@ class AllocaSlices::SliceBuilder : public PtrUseVisitor { std::tie(UsedI, I) = Uses.pop_back_val(); if (LoadInst *LI = dyn_cast(I)) { - Size = std::max(Size, DL.getTypeStoreSize(LI->getType())); + Size = std::max(Size, + DL.getTypeStoreSize(LI->getType()).getFixedSize()); continue; } if (StoreInst *SI = dyn_cast(I)) { Value *Op = SI->getOperand(0); if (Op == UsedI) return SI; - Size = std::max(Size, DL.getTypeStoreSize(Op->getType())); + Size = std::max(Size, + DL.getTypeStoreSize(Op->getType()).getFixedSize()); continue; } diff --git a/llvm/test/Other/scalable-vectors-core-ir.ll b/llvm/test/Other/scalable-vectors-core-ir.ll new file mode 100644 index 0000000000000..60cbab3cdff60 --- /dev/null +++ b/llvm/test/Other/scalable-vectors-core-ir.ll @@ -0,0 +1,393 @@ +; RUN: opt -S -verify < %s | FileCheck %s +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64--linux-gnu" + +;; Check supported instructions are accepted without dropping 'vscale'. +;; Same order as the LangRef + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Unary Operations +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + +define @fneg( %val) { +; CHECK-LABEL: @fneg +; CHECK: %r = fneg %val +; CHECK-NEXT: ret %r + %r = fneg %val + ret %r +} + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Binary Operations +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +define @add( %a, %b) { +; CHECK-LABEL: @add +; CHECK: %r = add %a, %b +; CHECK-NEXT: ret %r + %r = add %a, %b + ret %r +} + +define @fadd( %a, %b) { +; CHECK-LABEL: @fadd +; CHECK: %r = fadd %a, %b +; CHECK-NEXT: ret %r + %r = fadd %a, %b + ret %r +} + +define @sub( %a, %b) { +; CHECK-LABEL: @sub +; CHECK: %r = sub %a, %b +; CHECK-NEXT: ret %r + %r = sub %a, %b + ret %r +} + +define @fsub( %a, %b) { +; CHECK-LABEL: @fsub +; CHECK: %r = fsub %a, %b +; CHECK-NEXT: ret %r + %r = fsub %a, %b + ret %r +} + +define @mul( %a, %b) { +; CHECK-LABEL: @mul +; CHECK: %r = mul %a, %b +; CHECK-NEXT: ret %r + %r = mul %a, %b + ret %r +} + +define @fmul( %a, %b) { +; CHECK-LABEL: @fmul +; CHECK: %r = fmul %a, %b +; CHECK-NEXT: ret %r + %r = fmul %a, %b + ret %r +} + +define @udiv( %a, %b) { +; CHECK-LABEL: @udiv +; CHECK: %r = udiv %a, %b +; CHECK-NEXT: ret %r + %r = udiv %a, %b + ret %r +} + +define @sdiv( %a, %b) { +; CHECK-LABEL: @sdiv +; CHECK: %r = sdiv %a, %b +; CHECK-NEXT: ret %r + %r = sdiv %a, %b + ret %r +} + +define @fdiv( %a, %b) { +; CHECK-LABEL: @fdiv +; CHECK: %r = fdiv %a, %b +; CHECK-NEXT: ret %r + %r = fdiv %a, %b + ret %r +} + +define @urem( %a, %b) { +; CHECK-LABEL: @urem +; CHECK: %r = urem %a, %b +; CHECK-NEXT: ret %r + %r = urem %a, %b + ret %r +} + +define @srem( %a, %b) { +; CHECK-LABEL: @srem +; CHECK: %r = srem %a, %b +; CHECK-NEXT: ret %r + %r = srem %a, %b + ret %r +} + +define @frem( %a, %b) { +; CHECK-LABEL: @frem +; CHECK: %r = frem %a, %b +; CHECK-NEXT: ret %r + %r = frem %a, %b + ret %r +} + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Bitwise Binary Operations +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +define @shl( %a, %b) { +; CHECK-LABEL: @shl +; CHECK: %r = shl %a, %b +; CHECK-NEXT: ret %r + %r = shl %a, %b + ret %r +} + +define @lshr( %a, %b) { +; CHECK-LABEL: @lshr +; CHECK: %r = lshr %a, %b +; CHECK-NEXT: ret %r + %r = lshr %a, %b + ret %r +} + +define @ashr( %a, %b) { +; CHECK-LABEL: @ashr +; CHECK: %r = ashr %a, %b +; CHECK-NEXT: ret %r + %r = ashr %a, %b + ret %r +} + +define @and( %a, %b) { +; CHECK-LABEL: @and +; CHECK: %r = and %a, %b +; CHECK-NEXT: ret %r + %r = and %a, %b + ret %r +} + +define @or( %a, %b) { +; CHECK-LABEL: @or +; CHECK: %r = or %a, %b +; CHECK-NEXT: ret %r + %r = or %a, %b + ret %r +} + +define @xor( %a, %b) { +; CHECK-LABEL: @xor +; CHECK: %r = xor %a, %b +; CHECK-NEXT: ret %r + %r = xor %a, %b + ret %r +} + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Vector Operations +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +define i64 @extractelement( %val) { +; CHECK-LABEL: @extractelement +; CHECK: %r = extractelement %val, i32 0 +; CHECK-NEXT: ret i64 %r + %r = extractelement %val, i32 0 + ret i64 %r +} + +define @insertelement( %vec, i8 %ins) { +; CHECK-LABEL: @insertelement +; CHECK: %r = insertelement %vec, i8 %ins, i32 0 +; CHECK-NEXT: ret %r + %r = insertelement %vec, i8 %ins, i32 0 + ret %r +} + +define @shufflevector(half %val) { +; CHECK-LABEL: @shufflevector +; CHECK: %insvec = insertelement undef, half %val, i32 0 +; CHECK-NEXT: %r = shufflevector %insvec, undef, zeroinitializer +; CHECK-NEXT: ret %r + %insvec = insertelement undef, half %val, i32 0 + %r = shufflevector %insvec, undef, zeroinitializer + ret %r +} + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Memory Access and Addressing Operations +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +define void @alloca() { +; CHECK-LABEL: @alloca +; CHECK: %vec = alloca +; CHECK-NEXT: ret void + %vec = alloca + ret void +} + +define @load(* %ptr) { +; CHECK-LABEL: @load +; CHECK: %r = load , * %ptr +; CHECK-NEXT: ret %r + %r = load , * %ptr + ret %r +} + +define void @store( %data, * %ptr) { +; CHECK-LABEL: @store +; CHECK: store %data, * %ptr +; CHECK-NEXT: ret void + store %data, * %ptr + ret void +} + +define * @getelementptr(* %base) { +; CHECK-LABEL: @getelementptr +; CHECK: %r = getelementptr , * %base, i64 0 +; CHECK-NEXT: ret * %r + %r = getelementptr , * %base, i64 0 + ret * %r +} + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Conversion Operations +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +define @truncto( %val) { +; CHECK-LABEL: @truncto +; CHECK: %r = trunc %val to +; CHECK-NEXT: ret %r + %r = trunc %val to + ret %r +} + +define @zextto( %val) { +; CHECK-LABEL: @zextto +; CHECK: %r = zext %val to +; CHECK-NEXT: ret %r + %r = zext %val to + ret %r +} + +define @sextto( %val) { +; CHECK-LABEL: @sextto +; CHECK: %r = sext %val to +; CHECK-NEXT: ret %r + %r = sext %val to + ret %r +} + +define @fptruncto( %val) { +; CHECK-LABEL: @fptruncto +; CHECK: %r = fptrunc %val to +; CHECK-NEXT: ret %r + %r = fptrunc %val to + ret %r +} + +define @fpextto( %val) { +; CHECK-LABEL: @fpextto +; CHECK: %r = fpext %val to +; CHECK-NEXT: ret %r + %r = fpext %val to + ret %r +} + +define @fptouito( %val) { +; CHECK-LABEL: @fptoui +; CHECK: %r = fptoui %val to +; CHECK-NEXT: ret %r + %r = fptoui %val to + ret %r +} + +define @fptosito( %val) { +; CHECK-LABEL: @fptosi +; CHECK: %r = fptosi %val to +; CHECK-NEXT: ret %r + %r = fptosi %val to + ret %r +} + +define @uitofpto( %val) { +; CHECK-LABEL: @uitofp +; CHECK: %r = uitofp %val to +; CHECK-NEXT: ret %r + %r = uitofp %val to + ret %r +} + +define @sitofpto( %val) { +; CHECK-LABEL: @sitofp +; CHECK: %r = sitofp %val to +; CHECK-NEXT: ret %r + %r = sitofp %val to + ret %r +} + +define @ptrtointto( %val) { +; CHECK-LABEL: @ptrtointto +; CHECK: %r = ptrtoint %val to +; CHECK-NEXT: ret %r + %r = ptrtoint %val to + ret %r +} + +define @inttoptrto( %val) { +; CHECK-LABEL: @inttoptrto +; CHECK: %r = inttoptr %val to +; CHECK-NEXT: ret %r + %r = inttoptr %val to + ret %r +} + +define @bitcastto( %a) { +; CHECK-LABEL: @bitcast +; CHECK: %r = bitcast %a to +; CHECK-NEXT: ret %r + %r = bitcast %a to + ret %r +} + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Other Operations +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +define @icmp( %a, %b) { +; CHECK-LABEL: @icmp +; CHECK: %r = icmp eq %a, %b +; CHECK-NEXT: ret %r + %r = icmp eq %a, %b + ret %r +} + +define @fcmp( %a, %b) { +; CHECK-LABEL: @fcmp +; CHECK: %r = fcmp une %a, %b +; CHECK-NEXT: ret %r + %r = fcmp une %a, %b + ret %r +} + +define @phi( %a, i32 %val) { +; CHECK-LABEL: @phi +; CHECK: %r = phi [ %a, %entry ], [ %added, %iszero ] +; CHECK-NEXT: ret %r +entry: + %cmp = icmp eq i32 %val, 0 + br i1 %cmp, label %iszero, label %end + +iszero: + %ins = insertelement undef, i8 1, i32 0 + %splatone = shufflevector %ins, undef, zeroinitializer + %added = add %a, %splatone + br label %end + +end: + %r = phi [ %a, %entry ], [ %added, %iszero ] + ret %r +} + +define @select( %a, %b, %sval) { +; CHECK-LABEL: @select +; CHECK: %r = select %sval, %a, %b +; CHECK-NEXT: ret %r + %r = select %sval, %a, %b + ret %r +} + +declare @callee() +define @call( %val) { +; CHECK-LABEL: @call +; CHECK: %r = call @callee( %val) +; CHECK-NEXT: ret %r + %r = call @callee( %val) + ret %r +} \ No newline at end of file diff --git a/llvm/unittests/CodeGen/ScalableVectorMVTsTest.cpp b/llvm/unittests/CodeGen/ScalableVectorMVTsTest.cpp index fcb9848dcdcca..14a619653744e 100644 --- a/llvm/unittests/CodeGen/ScalableVectorMVTsTest.cpp +++ b/llvm/unittests/CodeGen/ScalableVectorMVTsTest.cpp @@ -10,7 +10,7 @@ #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/LLVMContext.h" #include "llvm/Support/MachineValueType.h" -#include "llvm/Support/ScalableSize.h" +#include "llvm/Support/TypeSize.h" #include "gtest/gtest.h" using namespace llvm; diff --git a/llvm/unittests/IR/VectorTypesTest.cpp b/llvm/unittests/IR/VectorTypesTest.cpp index f3caf6d69761f..606d0175360bf 100644 --- a/llvm/unittests/IR/VectorTypesTest.cpp +++ b/llvm/unittests/IR/VectorTypesTest.cpp @@ -6,9 +6,10 @@ // //===----------------------------------------------------------------------===// +#include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/LLVMContext.h" -#include "llvm/Support/ScalableSize.h" +#include "llvm/Support/TypeSize.h" #include "gtest/gtest.h" using namespace llvm; @@ -161,4 +162,117 @@ TEST(VectorTypesTest, Scalable) { ASSERT_TRUE(EltCnt.Scalable); } +TEST(VectorTypesTest, FixedLenComparisons) { + LLVMContext Ctx; + DataLayout DL(""); + + Type *Int32Ty = Type::getInt32Ty(Ctx); + Type *Int64Ty = Type::getInt64Ty(Ctx); + + VectorType *V2Int32Ty = VectorType::get(Int32Ty, 2); + VectorType *V4Int32Ty = VectorType::get(Int32Ty, 4); + + VectorType *V2Int64Ty = VectorType::get(Int64Ty, 2); + + TypeSize V2I32Len = V2Int32Ty->getPrimitiveSizeInBits(); + EXPECT_EQ(V2I32Len.getKnownMinSize(), 64U); + EXPECT_FALSE(V2I32Len.isScalable()); + + EXPECT_LT(V2Int32Ty->getPrimitiveSizeInBits(), + V4Int32Ty->getPrimitiveSizeInBits()); + EXPECT_GT(V2Int64Ty->getPrimitiveSizeInBits(), + V2Int32Ty->getPrimitiveSizeInBits()); + EXPECT_EQ(V4Int32Ty->getPrimitiveSizeInBits(), + V2Int64Ty->getPrimitiveSizeInBits()); + EXPECT_NE(V2Int32Ty->getPrimitiveSizeInBits(), + V2Int64Ty->getPrimitiveSizeInBits()); + + // Check that a fixed-only comparison works for fixed size vectors. + EXPECT_EQ(V2Int64Ty->getPrimitiveSizeInBits().getFixedSize(), + V4Int32Ty->getPrimitiveSizeInBits().getFixedSize()); + + // Check the DataLayout interfaces. + EXPECT_EQ(DL.getTypeSizeInBits(V2Int64Ty), + DL.getTypeSizeInBits(V4Int32Ty)); + EXPECT_EQ(DL.getTypeSizeInBits(V2Int32Ty), 64U); + EXPECT_EQ(DL.getTypeSizeInBits(V2Int64Ty), 128U); + EXPECT_EQ(DL.getTypeStoreSize(V2Int64Ty), + DL.getTypeStoreSize(V4Int32Ty)); + EXPECT_NE(DL.getTypeStoreSizeInBits(V2Int32Ty), + DL.getTypeStoreSizeInBits(V2Int64Ty)); + EXPECT_EQ(DL.getTypeStoreSizeInBits(V2Int32Ty), 64U); + EXPECT_EQ(DL.getTypeStoreSize(V2Int64Ty), 16U); + EXPECT_EQ(DL.getTypeAllocSize(V4Int32Ty), + DL.getTypeAllocSize(V2Int64Ty)); + EXPECT_NE(DL.getTypeAllocSizeInBits(V2Int32Ty), + DL.getTypeAllocSizeInBits(V2Int64Ty)); + EXPECT_EQ(DL.getTypeAllocSizeInBits(V4Int32Ty), 128U); + EXPECT_EQ(DL.getTypeAllocSize(V2Int32Ty), 8U); + ASSERT_TRUE(DL.typeSizeEqualsStoreSize(V4Int32Ty)); +} + +TEST(VectorTypesTest, ScalableComparisons) { + LLVMContext Ctx; + DataLayout DL(""); + + Type *Int32Ty = Type::getInt32Ty(Ctx); + Type *Int64Ty = Type::getInt64Ty(Ctx); + + VectorType *ScV2Int32Ty = VectorType::get(Int32Ty, {2, true}); + VectorType *ScV4Int32Ty = VectorType::get(Int32Ty, {4, true}); + + VectorType *ScV2Int64Ty = VectorType::get(Int64Ty, {2, true}); + + TypeSize ScV2I32Len = ScV2Int32Ty->getPrimitiveSizeInBits(); + EXPECT_EQ(ScV2I32Len.getKnownMinSize(), 64U); + EXPECT_TRUE(ScV2I32Len.isScalable()); + + EXPECT_LT(ScV2Int32Ty->getPrimitiveSizeInBits(), + ScV4Int32Ty->getPrimitiveSizeInBits()); + EXPECT_GT(ScV2Int64Ty->getPrimitiveSizeInBits(), + ScV2Int32Ty->getPrimitiveSizeInBits()); + EXPECT_EQ(ScV4Int32Ty->getPrimitiveSizeInBits(), + ScV2Int64Ty->getPrimitiveSizeInBits()); + EXPECT_NE(ScV2Int32Ty->getPrimitiveSizeInBits(), + ScV2Int64Ty->getPrimitiveSizeInBits()); + + // Check the DataLayout interfaces. + EXPECT_EQ(DL.getTypeSizeInBits(ScV2Int64Ty), + DL.getTypeSizeInBits(ScV4Int32Ty)); + EXPECT_EQ(DL.getTypeSizeInBits(ScV2Int32Ty).getKnownMinSize(), 64U); + EXPECT_EQ(DL.getTypeStoreSize(ScV2Int64Ty), + DL.getTypeStoreSize(ScV4Int32Ty)); + EXPECT_NE(DL.getTypeStoreSizeInBits(ScV2Int32Ty), + DL.getTypeStoreSizeInBits(ScV2Int64Ty)); + EXPECT_EQ(DL.getTypeStoreSizeInBits(ScV2Int32Ty).getKnownMinSize(), 64U); + EXPECT_EQ(DL.getTypeStoreSize(ScV2Int64Ty).getKnownMinSize(), 16U); + EXPECT_EQ(DL.getTypeAllocSize(ScV4Int32Ty), + DL.getTypeAllocSize(ScV2Int64Ty)); + EXPECT_NE(DL.getTypeAllocSizeInBits(ScV2Int32Ty), + DL.getTypeAllocSizeInBits(ScV2Int64Ty)); + EXPECT_EQ(DL.getTypeAllocSizeInBits(ScV4Int32Ty).getKnownMinSize(), 128U); + EXPECT_EQ(DL.getTypeAllocSize(ScV2Int32Ty).getKnownMinSize(), 8U); + ASSERT_TRUE(DL.typeSizeEqualsStoreSize(ScV4Int32Ty)); +} + +TEST(VectorTypesTest, CrossComparisons) { + LLVMContext Ctx; + + Type *Int32Ty = Type::getInt32Ty(Ctx); + + VectorType *V4Int32Ty = VectorType::get(Int32Ty, {4, false}); + VectorType *ScV4Int32Ty = VectorType::get(Int32Ty, {4, true}); + + // Even though the minimum size is the same, a scalable vector could be + // larger so we don't consider them to be the same size. + EXPECT_NE(V4Int32Ty->getPrimitiveSizeInBits(), + ScV4Int32Ty->getPrimitiveSizeInBits()); + // If we are only checking the minimum, then they are the same size. + EXPECT_EQ(V4Int32Ty->getPrimitiveSizeInBits().getKnownMinSize(), + ScV4Int32Ty->getPrimitiveSizeInBits().getKnownMinSize()); + + // We can't use ordering comparisons (<,<=,>,>=) between scalable and + // non-scalable vector sizes. +} + } // end anonymous namespace From aa53d6eb01a623254513c7f6ed61e0b3dc083ef4 Mon Sep 17 00:00:00 2001 From: Amaury Sechet Date: Tue, 8 Oct 2019 13:08:51 +0000 Subject: [PATCH 236/254] Add test for rotating truncated vectors. NFC llvm-svn: 374043 --- llvm/test/CodeGen/X86/rot16.ll | 13 +- llvm/test/CodeGen/X86/vector-rotate-128.ll | 143 +++++++++++++++++++++ 2 files changed, 149 insertions(+), 7 deletions(-) diff --git a/llvm/test/CodeGen/X86/rot16.ll b/llvm/test/CodeGen/X86/rot16.ll index 81838f84e3af4..5a1a8da3c677b 100644 --- a/llvm/test/CodeGen/X86/rot16.ll +++ b/llvm/test/CodeGen/X86/rot16.ll @@ -186,22 +186,21 @@ define i32 @rot16_demandedbits(i32 %x, i32 %y) nounwind { ; X32-NEXT: shrl $11, %ecx ; X32-NEXT: shll $5, %eax ; X32-NEXT: orl %ecx, %eax -; X32-NEXT: andl $65536, %eax # imm = 0x10000 +; X32-NEXT: movzwl %ax, %eax ; X32-NEXT: retl ; ; X64-LABEL: rot16_demandedbits: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: movl %edi, %ecx -; X64-NEXT: shrl $11, %ecx -; X64-NEXT: shll $5, %eax -; X64-NEXT: orl %ecx, %eax -; X64-NEXT: andl $65536, %eax # imm = 0x10000 +; X64-NEXT: shrl $11, %eax +; X64-NEXT: shll $5, %edi +; X64-NEXT: orl %eax, %edi +; X64-NEXT: movzwl %di, %eax ; X64-NEXT: retq %t0 = lshr i32 %x, 11 %t1 = shl i32 %x, 5 %t2 = or i32 %t0, %t1 - %t3 = and i32 %t2, 65536 + %t3 = and i32 %t2, 65535 ret i32 %t3 } diff --git a/llvm/test/CodeGen/X86/vector-rotate-128.ll b/llvm/test/CodeGen/X86/vector-rotate-128.ll index 3acdca7cda574..666325b35f782 100644 --- a/llvm/test/CodeGen/X86/vector-rotate-128.ll +++ b/llvm/test/CodeGen/X86/vector-rotate-128.ll @@ -2087,3 +2087,146 @@ define <16 x i8> @splatconstant_rotate_mask_v16i8(<16 x i8> %a) nounwind { %or = or <16 x i8> %lmask, %rmask ret <16 x i8> %or } + +define <4 x i32> @rot16_demandedbits(<4 x i32> %x, <4 x i32> %y) nounwind { +; X32-LABEL: rot16_demandedbits: +; X32: # %bb.0: +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl %eax, %ecx +; X32-NEXT: shrl $11, %ecx +; X32-NEXT: shll $5, %eax +; X32-NEXT: orl %ecx, %eax +; X32-NEXT: andl $65536, %eax # imm = 0x10000 +; X32-NEXT: retl +; +; X64-LABEL: rot16_demandedbits: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: movl %edi, %ecx +; X64-NEXT: shrl $11, %ecx +; X64-NEXT: shll $5, %eax +; X64-NEXT: orl %ecx, %eax +; X64-NEXT: andl $65536, %eax # imm = 0x10000 +; X64-NEXT: retq +; SSE2-LABEL: rot16_demandedbits: +; SSE2: # %bb.0: +; SSE2-NEXT: movdqa %xmm0, %xmm1 +; SSE2-NEXT: psrld $11, %xmm1 +; SSE2-NEXT: pslld $11, %xmm0 +; SSE2-NEXT: por %xmm1, %xmm0 +; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 +; SSE2-NEXT: retq +; +; SSE41-LABEL: rot16_demandedbits: +; SSE41: # %bb.0: +; SSE41-NEXT: movdqa %xmm0, %xmm1 +; SSE41-NEXT: psrld $11, %xmm1 +; SSE41-NEXT: pslld $11, %xmm0 +; SSE41-NEXT: por %xmm1, %xmm0 +; SSE41-NEXT: pxor %xmm1, %xmm1 +; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] +; SSE41-NEXT: retq +; +; AVX-LABEL: rot16_demandedbits: +; AVX: # %bb.0: +; AVX-NEXT: vpsrld $11, %xmm0, %xmm1 +; AVX-NEXT: vpslld $11, %xmm0, %xmm0 +; AVX-NEXT: vpor %xmm0, %xmm1, %xmm0 +; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] +; AVX-NEXT: retq +; +; AVX512-LABEL: rot16_demandedbits: +; AVX512: # %bb.0: +; AVX512-NEXT: vpsrld $11, %xmm0, %xmm1 +; AVX512-NEXT: vpslld $11, %xmm0, %xmm0 +; AVX512-NEXT: vpor %xmm0, %xmm1, %xmm0 +; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] +; AVX512-NEXT: retq +; +; XOP-LABEL: rot16_demandedbits: +; XOP: # %bb.0: +; XOP-NEXT: vpsrld $11, %xmm0, %xmm1 +; XOP-NEXT: vpslld $11, %xmm0, %xmm0 +; XOP-NEXT: vpor %xmm0, %xmm1, %xmm0 +; XOP-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; XOP-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] +; XOP-NEXT: retq +; +; X32-SSE-LABEL: rot16_demandedbits: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: movdqa %xmm0, %xmm1 +; X32-SSE-NEXT: psrld $11, %xmm1 +; X32-SSE-NEXT: pslld $11, %xmm0 +; X32-SSE-NEXT: por %xmm1, %xmm0 +; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm0 +; X32-SSE-NEXT: retl + %t0 = lshr <4 x i32> %x, + %t1 = shl <4 x i32> %x, + %t2 = or <4 x i32> %t0, %t1 + %t3 = and <4 x i32> %t2, + ret <4 x i32> %t3 +} + +define <4 x i16> @rot16_trunc(<4 x i32> %x, <4 x i32> %y) nounwind { +; SSE2-LABEL: rot16_trunc: +; SSE2: # %bb.0: +; SSE2-NEXT: movdqa %xmm0, %xmm1 +; SSE2-NEXT: psrld $11, %xmm1 +; SSE2-NEXT: pslld $5, %xmm0 +; SSE2-NEXT: por %xmm1, %xmm0 +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; SSE2-NEXT: retq +; +; SSE41-LABEL: rot16_trunc: +; SSE41: # %bb.0: +; SSE41-NEXT: movdqa %xmm0, %xmm1 +; SSE41-NEXT: psrld $11, %xmm1 +; SSE41-NEXT: pslld $5, %xmm0 +; SSE41-NEXT: por %xmm1, %xmm0 +; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] +; SSE41-NEXT: retq +; +; AVX-LABEL: rot16_trunc: +; AVX: # %bb.0: +; AVX-NEXT: vpsrld $11, %xmm0, %xmm1 +; AVX-NEXT: vpslld $5, %xmm0, %xmm0 +; AVX-NEXT: vpor %xmm0, %xmm1, %xmm0 +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] +; AVX-NEXT: retq +; +; AVX512-LABEL: rot16_trunc: +; AVX512: # %bb.0: +; AVX512-NEXT: vpsrld $11, %xmm0, %xmm1 +; AVX512-NEXT: vpslld $5, %xmm0, %xmm0 +; AVX512-NEXT: vpor %xmm0, %xmm1, %xmm0 +; AVX512-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] +; AVX512-NEXT: retq +; +; XOP-LABEL: rot16_trunc: +; XOP: # %bb.0: +; XOP-NEXT: vpsrld $11, %xmm0, %xmm1 +; XOP-NEXT: vpslld $5, %xmm0, %xmm0 +; XOP-NEXT: vpor %xmm0, %xmm1, %xmm0 +; XOP-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] +; XOP-NEXT: retq +; +; X32-SSE-LABEL: rot16_trunc: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: movdqa %xmm0, %xmm1 +; X32-SSE-NEXT: psrld $11, %xmm1 +; X32-SSE-NEXT: pslld $5, %xmm0 +; X32-SSE-NEXT: por %xmm1, %xmm0 +; X32-SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; X32-SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7] +; X32-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; X32-SSE-NEXT: retl + %t0 = lshr <4 x i32> %x, + %t1 = shl <4 x i32> %x, + %t2 = or <4 x i32> %t0, %t1 + %t3 = trunc <4 x i32> %t2 to <4 x i16> + ret <4 x i16> %t3 +} From d0d52edae92f5705450862561cecfc949d762481 Mon Sep 17 00:00:00 2001 From: Sebastian Pop Date: Tue, 8 Oct 2019 13:23:57 +0000 Subject: [PATCH 237/254] fix fmls fp16 Tim Northover remarked that the added patterns for fmls fp16 produce wrong code in case the fsub instruction has a multiplication as its first operand, i.e., all the patterns FMLSv*_OP1: > define <8 x half> @test_FMLSv8f16_OP1(<8 x half> %a, <8 x half> %b, <8 x half> %c) { > ; CHECK-LABEL: test_FMLSv8f16_OP1: > ; CHECK: fmls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h > entry: > > %mul = fmul fast <8 x half> %c, %b > %sub = fsub fast <8 x half> %mul, %a > ret <8 x half> %sub > } > > This doesn't look right to me. The exact instruction produced is "fmls > v0.8h, v2.8h, v1.8h", which I think calculates "v0 - v2*v1", but the > IR is calculating "v2*v1-v0". The equivalent <4 x float> code also > doesn't emit an fmls. This patch generates an fmla and negates the value of the operand2 of the fsub. Inspecting the pattern match, I found that there was another mistake in the opcode to be selected: matching FMULv4*16 should generate FMLSv4*16 and not FMLSv2*32. Tested on aarch64-linux with make check-all. Differential Revision: https://reviews.llvm.org/D67990 llvm-svn: 374044 --- .../llvm/CodeGen/MachineCombinerPattern.h | 2 + llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 53 ++++++++++++++----- llvm/test/CodeGen/AArch64/fp16-fmla.ll | 16 +++++- 3 files changed, 57 insertions(+), 14 deletions(-) diff --git a/llvm/include/llvm/CodeGen/MachineCombinerPattern.h b/llvm/include/llvm/CodeGen/MachineCombinerPattern.h index 31056c8fdf0f1..503227222207f 100644 --- a/llvm/include/llvm/CodeGen/MachineCombinerPattern.h +++ b/llvm/include/llvm/CodeGen/MachineCombinerPattern.h @@ -80,6 +80,7 @@ enum class MachineCombinerPattern { FMLAv4i32_indexed_OP2, FMLSv1i32_indexed_OP2, FMLSv1i64_indexed_OP2, + FMLSv4f16_OP1, FMLSv4f16_OP2, FMLSv8f16_OP1, FMLSv8f16_OP2, @@ -87,6 +88,7 @@ enum class MachineCombinerPattern { FMLSv2f32_OP2, FMLSv2f64_OP1, FMLSv2f64_OP2, + FMLSv4i16_indexed_OP1, FMLSv4i16_indexed_OP2, FMLSv8i16_indexed_OP1, FMLSv8i16_indexed_OP2, diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 1cc3177b26a7f..57782862967d7 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -3806,8 +3806,8 @@ static bool getFMAPatterns(MachineInstr &Root, Found |= Match(AArch64::FMULv4i16_indexed, 2, MCP::FMLSv4i16_indexed_OP2) || Match(AArch64::FMULv4f16, 2, MCP::FMLSv4f16_OP2); - Found |= Match(AArch64::FMULv4i16_indexed, 1, MCP::FMLSv2i32_indexed_OP1) || - Match(AArch64::FMULv4f16, 1, MCP::FMLSv2f32_OP1); + Found |= Match(AArch64::FMULv4i16_indexed, 1, MCP::FMLSv4i16_indexed_OP1) || + Match(AArch64::FMULv4f16, 1, MCP::FMLSv4f16_OP1); break; case AArch64::FSUBv8f16: Found |= Match(AArch64::FMULv8i16_indexed, 2, MCP::FMLSv8i16_indexed_OP2) || @@ -3888,6 +3888,7 @@ bool AArch64InstrInfo::isThroughputPattern( case MachineCombinerPattern::FMLAv4f32_OP2: case MachineCombinerPattern::FMLAv4i32_indexed_OP1: case MachineCombinerPattern::FMLAv4i32_indexed_OP2: + case MachineCombinerPattern::FMLSv4i16_indexed_OP1: case MachineCombinerPattern::FMLSv4i16_indexed_OP2: case MachineCombinerPattern::FMLSv8i16_indexed_OP1: case MachineCombinerPattern::FMLSv8i16_indexed_OP2: @@ -3895,6 +3896,7 @@ bool AArch64InstrInfo::isThroughputPattern( case MachineCombinerPattern::FMLSv1i64_indexed_OP2: case MachineCombinerPattern::FMLSv2i32_indexed_OP2: case MachineCombinerPattern::FMLSv2i64_indexed_OP2: + case MachineCombinerPattern::FMLSv4f16_OP1: case MachineCombinerPattern::FMLSv4f16_OP2: case MachineCombinerPattern::FMLSv8f16_OP1: case MachineCombinerPattern::FMLSv8f16_OP2: @@ -4497,6 +4499,26 @@ void AArch64InstrInfo::genAlternativeCodeSequence( FMAInstKind::Indexed); break; + case MachineCombinerPattern::FMLSv4f16_OP1: + case MachineCombinerPattern::FMLSv4i16_indexed_OP1: { + RC = &AArch64::FPR64RegClass; + Register NewVR = MRI.createVirtualRegister(RC); + MachineInstrBuilder MIB1 = + BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv4f16), NewVR) + .add(Root.getOperand(2)); + InsInstrs.push_back(MIB1); + InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); + if (Pattern == MachineCombinerPattern::FMLSv4f16_OP1) { + Opc = AArch64::FMLAv4f16; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, + FMAInstKind::Accumulator, &NewVR); + } else { + Opc = AArch64::FMLAv4i16_indexed; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, + FMAInstKind::Indexed, &NewVR); + } + break; + } case MachineCombinerPattern::FMLSv4f16_OP2: RC = &AArch64::FPR64RegClass; Opc = AArch64::FMLSv4f16; @@ -4525,18 +4547,25 @@ void AArch64InstrInfo::genAlternativeCodeSequence( break; case MachineCombinerPattern::FMLSv8f16_OP1: + case MachineCombinerPattern::FMLSv8i16_indexed_OP1: { RC = &AArch64::FPR128RegClass; - Opc = AArch64::FMLSv8f16; - MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, - FMAInstKind::Accumulator); - break; - case MachineCombinerPattern::FMLSv8i16_indexed_OP1: - RC = &AArch64::FPR128RegClass; - Opc = AArch64::FMLSv8i16_indexed; - MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, - FMAInstKind::Indexed); + Register NewVR = MRI.createVirtualRegister(RC); + MachineInstrBuilder MIB1 = + BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv8f16), NewVR) + .add(Root.getOperand(2)); + InsInstrs.push_back(MIB1); + InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); + if (Pattern == MachineCombinerPattern::FMLSv8f16_OP1) { + Opc = AArch64::FMLAv8f16; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, + FMAInstKind::Accumulator, &NewVR); + } else { + Opc = AArch64::FMLAv8i16_indexed; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, + FMAInstKind::Indexed, &NewVR); + } break; - + } case MachineCombinerPattern::FMLSv8f16_OP2: RC = &AArch64::FPR128RegClass; Opc = AArch64::FMLSv8f16; diff --git a/llvm/test/CodeGen/AArch64/fp16-fmla.ll b/llvm/test/CodeGen/AArch64/fp16-fmla.ll index 08228e25d4aad..a81721afb8453 100644 --- a/llvm/test/CodeGen/AArch64/fp16-fmla.ll +++ b/llvm/test/CodeGen/AArch64/fp16-fmla.ll @@ -138,6 +138,16 @@ entry: ret <8 x half> %add } +define <4 x half> @test_FMLSv4f16_OP1(<4 x half> %a, <4 x half> %b, <4 x half> %c) { +; CHECK-LABEL: test_FMLSv4f16_OP1: +; CHECK: fneg {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +; CHECK: fmla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +entry: + %mul = fmul fast <4 x half> %c, %b + %sub = fsub fast <4 x half> %mul, %a + ret <4 x half> %sub +} + define <4 x half> @test_FMLSv4f16_OP2(<4 x half> %a, <4 x half> %b, <4 x half> %c) { ; CHECK-LABEL: test_FMLSv4f16_OP2: ; CHECK: fmls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h @@ -149,7 +159,8 @@ entry: define <8 x half> @test_FMLSv8f16_OP1(<8 x half> %a, <8 x half> %b, <8 x half> %c) { ; CHECK-LABEL: test_FMLSv8f16_OP1: -; CHECK: fmls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +; CHECK: fneg {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +; CHECK: fmla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h entry: %mul = fmul fast <8 x half> %c, %b %sub = fsub fast <8 x half> %mul, %a @@ -185,7 +196,8 @@ define <8 x half> @test_FMLSv8i16_indexed_OP1(<8 x half> %a, <8 x i16> %b, <8 x ; CHECK: mul ; CHECK: fsub ; CHECK-FIXME: It should instead produce the following instruction: -; CHECK-FIXME: fmls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +; CHECK-FIXME: fneg {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +; CHECK-FIXME: fmla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h entry: %mul = mul <8 x i16> %c, %b %m = bitcast <8 x i16> %mul to <8 x half> From 0929e5eca2cb2b9dcc854a3dad86273a6a6e6adc Mon Sep 17 00:00:00 2001 From: "Kevin P. Neal" Date: Tue, 8 Oct 2019 13:38:42 +0000 Subject: [PATCH 238/254] Restore documentation that 'svn update' unexpectedly yanked out from under me. llvm-svn: 374045 --- llvm/docs/ProgrammingDocumentation.rst | 54 +++++++ llvm/docs/SubsystemDocumentation.rst | 206 +++++++++++++++++++++++++ 2 files changed, 260 insertions(+) create mode 100644 llvm/docs/ProgrammingDocumentation.rst create mode 100644 llvm/docs/SubsystemDocumentation.rst diff --git a/llvm/docs/ProgrammingDocumentation.rst b/llvm/docs/ProgrammingDocumentation.rst new file mode 100644 index 0000000000000..fa8f114b22a30 --- /dev/null +++ b/llvm/docs/ProgrammingDocumentation.rst @@ -0,0 +1,54 @@ +Programming Documentation +========================= + +For developers of applications which use LLVM as a library. + +.. toctree:: + :hidden: + + Atomics + CommandLine + ExtendingLLVM + HowToSetUpLLVMStyleRTTI + ProgrammersManual + Extensions + LibFuzzer + FuzzingLLVM + ScudoHardenedAllocator + OptBisect + GwpAsan + +:doc:`Atomics` + Information about LLVM's concurrency model. + +:doc:`ProgrammersManual` + Introduction to the general layout of the LLVM sourcebase, important classes + and APIs, and some tips & tricks. + +:doc:`Extensions` + LLVM-specific extensions to tools and formats LLVM seeks compatibility with. + +:doc:`CommandLine` + Provides information on using the command line parsing library. + +:doc:`HowToSetUpLLVMStyleRTTI` + How to make ``isa<>``, ``dyn_cast<>``, etc. available for clients of your + class hierarchy. + +:doc:`ExtendingLLVM` + Look here to see how to add instructions and intrinsics to LLVM. + +:doc:`LibFuzzer` + A library for writing in-process guided fuzzers. + +:doc:`FuzzingLLVM` + Information on writing and using Fuzzers to find bugs in LLVM. + +:doc:`ScudoHardenedAllocator` + A library that implements a security-hardened `malloc()`. + +:doc:`OptBisect` + A command line option for debugging optimization-induced failures. + +:doc:`GwpAsan` + A sampled heap memory error detection toolkit designed for production use. \ No newline at end of file diff --git a/llvm/docs/SubsystemDocumentation.rst b/llvm/docs/SubsystemDocumentation.rst new file mode 100644 index 0000000000000..cac0ce85b1f5d --- /dev/null +++ b/llvm/docs/SubsystemDocumentation.rst @@ -0,0 +1,206 @@ +.. _index-subsystem-docs: + +Subsystem Documentation +======================= + +For API clients and LLVM developers. + +.. toctree:: + :hidden: + + AliasAnalysis + MemorySSA + BitCodeFormat + BlockFrequencyTerminology + BranchWeightMetadata + Bugpoint + CodeGenerator + ExceptionHandling + AddingConstrainedIntrinsics + LinkTimeOptimization + SegmentedStacks + TableGenFundamentals + TableGen/index + DebuggingJITedCode + GoldPlugin + MarkedUpDisassembly + SystemLibrary + SupportLibrary + SourceLevelDebugging + Vectorizers + WritingAnLLVMBackend + GarbageCollection + WritingAnLLVMPass + HowToUseAttributes + NVPTXUsage + AMDGPUUsage + StackMaps + InAlloca + BigEndianNEON + CoverageMappingFormat + Statepoints + MergeFunctions + TypeMetadata + TransformMetadata + FaultMaps + Coroutines + GlobalISel + XRay + XRayExample + XRayFDRFormat + PDB/index + CFIVerify + SpeculativeLoadHardening + StackSafetyAnalysis + LoopTerminology + DependenceGraphs/index + +:doc:`WritingAnLLVMPass` + Information on how to write LLVM transformations and analyses. + +:doc:`WritingAnLLVMBackend` + Information on how to write LLVM backends for machine targets. + +:doc:`CodeGenerator` + The design and implementation of the LLVM code generator. Useful if you are + working on retargetting LLVM to a new architecture, designing a new codegen + pass, or enhancing existing components. + +:doc:`TableGen ` + Describes the TableGen tool, which is used heavily by the LLVM code + generator. + +:doc:`AliasAnalysis` + Information on how to write a new alias analysis implementation or how to + use existing analyses. + +:doc:`MemorySSA` + Information about the MemorySSA utility in LLVM, as well as how to use it. + +:doc:`GarbageCollection` + The interfaces source-language compilers should use for compiling GC'd + programs. + +:doc:`Source Level Debugging with LLVM ` + This document describes the design and philosophy behind the LLVM + source-level debugger. + +:doc:`Vectorizers` + This document describes the current status of vectorization in LLVM. + +:doc:`ExceptionHandling` + This document describes the design and implementation of exception handling + in LLVM. + +:doc:`AddingConstrainedIntrinsics` + Gives the steps necessary when adding a new constrained math intrinsic + to LLVM. + +:doc:`Bugpoint` + Automatic bug finder and test-case reducer description and usage + information. + +:doc:`BitCodeFormat` + This describes the file format and encoding used for LLVM "bc" files. + +:doc:`Support Library ` + This document describes the LLVM Support Library (``lib/Support``) and + how to keep LLVM source code portable + +:doc:`LinkTimeOptimization` + This document describes the interface between LLVM intermodular optimizer + and the linker and its design + +:doc:`GoldPlugin` + How to build your programs with link-time optimization on Linux. + +:doc:`DebuggingJITedCode` + How to debug JITed code with GDB. + +:doc:`MCJITDesignAndImplementation` + Describes the inner workings of MCJIT execution engine. + +:doc:`ORCv2` + Describes the design and implementation of the ORC APIs, including some + usage examples, and a guide for users transitioning from ORCv1 to ORCv2. + +:doc:`BranchWeightMetadata` + Provides information about Branch Prediction Information. + +:doc:`BlockFrequencyTerminology` + Provides information about terminology used in the ``BlockFrequencyInfo`` + analysis pass. + +:doc:`SegmentedStacks` + This document describes segmented stacks and how they are used in LLVM. + +:doc:`MarkedUpDisassembly` + This document describes the optional rich disassembly output syntax. + +:doc:`HowToUseAttributes` + Answers some questions about the new Attributes infrastructure. + +:doc:`NVPTXUsage` + This document describes using the NVPTX backend to compile GPU kernels. + +:doc:`AMDGPUUsage` + This document describes using the AMDGPU backend to compile GPU kernels. + +:doc:`StackMaps` + LLVM support for mapping instruction addresses to the location of + values and allowing code to be patched. + +:doc:`BigEndianNEON` + LLVM's support for generating NEON instructions on big endian ARM targets is + somewhat nonintuitive. This document explains the implementation and rationale. + +:doc:`CoverageMappingFormat` + This describes the format and encoding used for LLVM’s code coverage mapping. + +:doc:`Statepoints` + This describes a set of experimental extensions for garbage + collection support. + +:doc:`MergeFunctions` + Describes functions merging optimization. + +:doc:`InAlloca` + Description of the ``inalloca`` argument attribute. + +:doc:`FaultMaps` + LLVM support for folding control flow into faulting machine instructions. + +:doc:`CompileCudaWithLLVM` + LLVM support for CUDA. + +:doc:`Coroutines` + LLVM support for coroutines. + +:doc:`GlobalISel` + This describes the prototype instruction selection replacement, GlobalISel. + +:doc:`XRay` + High-level documentation of how to use XRay in LLVM. + +:doc:`XRayExample` + An example of how to debug an application with XRay. + +:doc:`The Microsoft PDB File Format ` + A detailed description of the Microsoft PDB (Program Database) file format. + +:doc:`CFIVerify` + A description of the verification tool for Control Flow Integrity. + +:doc:`SpeculativeLoadHardening` + A description of the Speculative Load Hardening mitigation for Spectre v1. + +:doc:`StackSafetyAnalysis` + This document describes the design of the stack safety analysis of local + variables. + +:doc:`LoopTerminology` + A document describing Loops and associated terms as used in LLVM. + +:doc:`Dependence Graphs ` + A description of the design of the various dependence graphs such as + the DDG (Data Dependence Graph). From 9a44ab77637da1e740b72a72765c4504e13d9331 Mon Sep 17 00:00:00 2001 From: Pavel Labath Date: Tue, 8 Oct 2019 13:51:05 +0000 Subject: [PATCH 239/254] Tweak minidebuginfo-set-and-hit-breakpoint.test On my system, llvm-objcopy was refusing to remove the .dynsym section because it was still referenced from .rela.plt. Remove that section too, and clarify that this is needed only because llvm-objcopy --only-keep-debug does not work (does not set the sections to SHT_NOBITS). Also, ensure that the test is not creating temporary files in the source tree. llvm-svn: 374046 --- .../ELF/minidebuginfo-set-and-hit-breakpoint.test | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/lldb/lit/ObjectFile/ELF/minidebuginfo-set-and-hit-breakpoint.test b/lldb/lit/ObjectFile/ELF/minidebuginfo-set-and-hit-breakpoint.test index 33188d2b4b749..c568a19642a07 100644 --- a/lldb/lit/ObjectFile/ELF/minidebuginfo-set-and-hit-breakpoint.test +++ b/lldb/lit/ObjectFile/ELF/minidebuginfo-set-and-hit-breakpoint.test @@ -23,9 +23,9 @@ # table. # IGNORE: comm -13 %t.dynsyms %t.funcsyms > %t.keep_symbols -# The result of the preceeding command can be preprocessed in %p/Inputs/minidebuginfo.keep_symbols +# The result of the preceeding command can be hardcoded # because we know what symbol to keep. -# RUN: echo "multiplyByFour" > %p/Inputs/minidebuginfo.keep_symbols +# RUN: echo "multiplyByFour" > %t.keep_symbols # Separate full debug info into debug binary. @@ -34,12 +34,15 @@ # Copy the full debuginfo, keeping only a minimal set of symbols and # removing some unnecessary sections. -# RUN: llvm-objcopy -S --remove-section .gdb_index --remove-section .comment --keep-symbols=%p/Inputs/minidebuginfo.keep_symbols %t.debug %t.mini_debuginfo +# RUN: llvm-objcopy -S --remove-section .gdb_index --remove-section .comment --keep-symbols=%t.keep_symbols %t.debug %t.mini_debuginfo # This command is not from the GDB manual but it slims down embedded minidebug # info. On top if that, it ensures that we only have the multiplyByThree symbol -# in the .dynsym section of the main binary. -# RUN: llvm-objcopy --remove-section=.rela.dyn --remove-section=.gnu.version --remove-section=.gnu.hash --remove-section=.dynsym %t.mini_debuginfo +# in the .dynsym section of the main binary. The bits removing .rela.plt, +# .rela.dyn and .dynsym sections can be removed once llvm-objcopy +# --only-keep-debug starts to work. +# RUN: llvm-objcopy --remove-section=.rela.plt --remove-section=.rela.dyn \ +# RUN: --remove-section=.gnu.version --remove-section=.gnu.hash --remove-section=.dynsym %t.mini_debuginfo # Drop the full debug info from the original binary. From 3c4646194ede95d26e30b24be9964ce45925a844 Mon Sep 17 00:00:00 2001 From: Kadir Cetinkaya Date: Tue, 8 Oct 2019 13:54:03 +0000 Subject: [PATCH 240/254] [clangd] Bump timeouts in speculative completion tests llvm-svn: 374047 --- clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp b/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp index 68d0a46760985..73bb1f97bda2f 100644 --- a/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp +++ b/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp @@ -1137,7 +1137,7 @@ class IndexRequestCollector : public SymbolIndex { const std::vector consumeRequests(size_t Num) const { std::unique_lock Lock(Mut); - EXPECT_TRUE(wait(Lock, ReceivedRequestCV, timeoutSeconds(10), + EXPECT_TRUE(wait(Lock, ReceivedRequestCV, timeoutSeconds(30), [this, Num] { return Requests.size() == Num; })); auto Reqs = std::move(Requests); Requests = {}; From 1b36caf45e5e52b139818e24e07ee069e9c8e18a Mon Sep 17 00:00:00 2001 From: Ilya Biryukov Date: Tue, 8 Oct 2019 14:03:45 +0000 Subject: [PATCH 241/254] [clangd] Disable expand auto on decltype(auto) Summary: Applying it produces incorrect code at the moment. Reviewers: sammccall Reviewed By: sammccall Subscribers: kuhnel, MaskRay, jkorous, arphaman, kadircet, usaxena95, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D68630 llvm-svn: 374048 --- clang-tools-extra/clangd/refactor/tweaks/ExpandAutoType.cpp | 4 +++- clang-tools-extra/clangd/unittests/TweakTests.cpp | 2 ++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/clang-tools-extra/clangd/refactor/tweaks/ExpandAutoType.cpp b/clang-tools-extra/clangd/refactor/tweaks/ExpandAutoType.cpp index eaab40b2407bc..76c14ae723d75 100644 --- a/clang-tools-extra/clangd/refactor/tweaks/ExpandAutoType.cpp +++ b/clang-tools-extra/clangd/refactor/tweaks/ExpandAutoType.cpp @@ -61,7 +61,9 @@ bool ExpandAutoType::prepare(const Selection& Inputs) { if (auto *Node = Inputs.ASTSelection.commonAncestor()) { if (auto *TypeNode = Node->ASTNode.get()) { if (const AutoTypeLoc Result = TypeNode->getAs()) { - CachedLocation = Result; + // Code in apply() does handle 'decltype(auto)' yet. + if (!Result.getTypePtr()->isDecltypeAuto()) + CachedLocation = Result; } } } diff --git a/clang-tools-extra/clangd/unittests/TweakTests.cpp b/clang-tools-extra/clangd/unittests/TweakTests.cpp index 97cd4a2cafa66..8cc29dd182b14 100644 --- a/clang-tools-extra/clangd/unittests/TweakTests.cpp +++ b/clang-tools-extra/clangd/unittests/TweakTests.cpp @@ -528,6 +528,8 @@ TEST_F(ExpandAutoTypeTest, Test) { // replace array types EXPECT_EQ(apply(R"cpp(au^to x = "test")cpp"), R"cpp(const char * x = "test")cpp"); + + EXPECT_UNAVAILABLE("dec^ltype(au^to) x = 10;"); } TWEAK_TEST(ExtractFunction); From c91f1992a6cc13005550ae523aea2627e2ebd977 Mon Sep 17 00:00:00 2001 From: "Kevin P. Neal" Date: Tue, 8 Oct 2019 14:10:26 +0000 Subject: [PATCH 242/254] Nope, I'm wrong. It looks like someone else removed these on purpose and it just happened to break the bot right when I did my push. So I'm undoing this mornings incorrect push. I've also kicked off an email to hopefully get the bot fixed the correct way. llvm-svn: 374049 --- llvm/docs/ProgrammingDocumentation.rst | 54 ------- llvm/docs/SubsystemDocumentation.rst | 206 ------------------------- 2 files changed, 260 deletions(-) delete mode 100644 llvm/docs/ProgrammingDocumentation.rst delete mode 100644 llvm/docs/SubsystemDocumentation.rst diff --git a/llvm/docs/ProgrammingDocumentation.rst b/llvm/docs/ProgrammingDocumentation.rst deleted file mode 100644 index fa8f114b22a30..0000000000000 --- a/llvm/docs/ProgrammingDocumentation.rst +++ /dev/null @@ -1,54 +0,0 @@ -Programming Documentation -========================= - -For developers of applications which use LLVM as a library. - -.. toctree:: - :hidden: - - Atomics - CommandLine - ExtendingLLVM - HowToSetUpLLVMStyleRTTI - ProgrammersManual - Extensions - LibFuzzer - FuzzingLLVM - ScudoHardenedAllocator - OptBisect - GwpAsan - -:doc:`Atomics` - Information about LLVM's concurrency model. - -:doc:`ProgrammersManual` - Introduction to the general layout of the LLVM sourcebase, important classes - and APIs, and some tips & tricks. - -:doc:`Extensions` - LLVM-specific extensions to tools and formats LLVM seeks compatibility with. - -:doc:`CommandLine` - Provides information on using the command line parsing library. - -:doc:`HowToSetUpLLVMStyleRTTI` - How to make ``isa<>``, ``dyn_cast<>``, etc. available for clients of your - class hierarchy. - -:doc:`ExtendingLLVM` - Look here to see how to add instructions and intrinsics to LLVM. - -:doc:`LibFuzzer` - A library for writing in-process guided fuzzers. - -:doc:`FuzzingLLVM` - Information on writing and using Fuzzers to find bugs in LLVM. - -:doc:`ScudoHardenedAllocator` - A library that implements a security-hardened `malloc()`. - -:doc:`OptBisect` - A command line option for debugging optimization-induced failures. - -:doc:`GwpAsan` - A sampled heap memory error detection toolkit designed for production use. \ No newline at end of file diff --git a/llvm/docs/SubsystemDocumentation.rst b/llvm/docs/SubsystemDocumentation.rst deleted file mode 100644 index cac0ce85b1f5d..0000000000000 --- a/llvm/docs/SubsystemDocumentation.rst +++ /dev/null @@ -1,206 +0,0 @@ -.. _index-subsystem-docs: - -Subsystem Documentation -======================= - -For API clients and LLVM developers. - -.. toctree:: - :hidden: - - AliasAnalysis - MemorySSA - BitCodeFormat - BlockFrequencyTerminology - BranchWeightMetadata - Bugpoint - CodeGenerator - ExceptionHandling - AddingConstrainedIntrinsics - LinkTimeOptimization - SegmentedStacks - TableGenFundamentals - TableGen/index - DebuggingJITedCode - GoldPlugin - MarkedUpDisassembly - SystemLibrary - SupportLibrary - SourceLevelDebugging - Vectorizers - WritingAnLLVMBackend - GarbageCollection - WritingAnLLVMPass - HowToUseAttributes - NVPTXUsage - AMDGPUUsage - StackMaps - InAlloca - BigEndianNEON - CoverageMappingFormat - Statepoints - MergeFunctions - TypeMetadata - TransformMetadata - FaultMaps - Coroutines - GlobalISel - XRay - XRayExample - XRayFDRFormat - PDB/index - CFIVerify - SpeculativeLoadHardening - StackSafetyAnalysis - LoopTerminology - DependenceGraphs/index - -:doc:`WritingAnLLVMPass` - Information on how to write LLVM transformations and analyses. - -:doc:`WritingAnLLVMBackend` - Information on how to write LLVM backends for machine targets. - -:doc:`CodeGenerator` - The design and implementation of the LLVM code generator. Useful if you are - working on retargetting LLVM to a new architecture, designing a new codegen - pass, or enhancing existing components. - -:doc:`TableGen ` - Describes the TableGen tool, which is used heavily by the LLVM code - generator. - -:doc:`AliasAnalysis` - Information on how to write a new alias analysis implementation or how to - use existing analyses. - -:doc:`MemorySSA` - Information about the MemorySSA utility in LLVM, as well as how to use it. - -:doc:`GarbageCollection` - The interfaces source-language compilers should use for compiling GC'd - programs. - -:doc:`Source Level Debugging with LLVM ` - This document describes the design and philosophy behind the LLVM - source-level debugger. - -:doc:`Vectorizers` - This document describes the current status of vectorization in LLVM. - -:doc:`ExceptionHandling` - This document describes the design and implementation of exception handling - in LLVM. - -:doc:`AddingConstrainedIntrinsics` - Gives the steps necessary when adding a new constrained math intrinsic - to LLVM. - -:doc:`Bugpoint` - Automatic bug finder and test-case reducer description and usage - information. - -:doc:`BitCodeFormat` - This describes the file format and encoding used for LLVM "bc" files. - -:doc:`Support Library ` - This document describes the LLVM Support Library (``lib/Support``) and - how to keep LLVM source code portable - -:doc:`LinkTimeOptimization` - This document describes the interface between LLVM intermodular optimizer - and the linker and its design - -:doc:`GoldPlugin` - How to build your programs with link-time optimization on Linux. - -:doc:`DebuggingJITedCode` - How to debug JITed code with GDB. - -:doc:`MCJITDesignAndImplementation` - Describes the inner workings of MCJIT execution engine. - -:doc:`ORCv2` - Describes the design and implementation of the ORC APIs, including some - usage examples, and a guide for users transitioning from ORCv1 to ORCv2. - -:doc:`BranchWeightMetadata` - Provides information about Branch Prediction Information. - -:doc:`BlockFrequencyTerminology` - Provides information about terminology used in the ``BlockFrequencyInfo`` - analysis pass. - -:doc:`SegmentedStacks` - This document describes segmented stacks and how they are used in LLVM. - -:doc:`MarkedUpDisassembly` - This document describes the optional rich disassembly output syntax. - -:doc:`HowToUseAttributes` - Answers some questions about the new Attributes infrastructure. - -:doc:`NVPTXUsage` - This document describes using the NVPTX backend to compile GPU kernels. - -:doc:`AMDGPUUsage` - This document describes using the AMDGPU backend to compile GPU kernels. - -:doc:`StackMaps` - LLVM support for mapping instruction addresses to the location of - values and allowing code to be patched. - -:doc:`BigEndianNEON` - LLVM's support for generating NEON instructions on big endian ARM targets is - somewhat nonintuitive. This document explains the implementation and rationale. - -:doc:`CoverageMappingFormat` - This describes the format and encoding used for LLVM’s code coverage mapping. - -:doc:`Statepoints` - This describes a set of experimental extensions for garbage - collection support. - -:doc:`MergeFunctions` - Describes functions merging optimization. - -:doc:`InAlloca` - Description of the ``inalloca`` argument attribute. - -:doc:`FaultMaps` - LLVM support for folding control flow into faulting machine instructions. - -:doc:`CompileCudaWithLLVM` - LLVM support for CUDA. - -:doc:`Coroutines` - LLVM support for coroutines. - -:doc:`GlobalISel` - This describes the prototype instruction selection replacement, GlobalISel. - -:doc:`XRay` - High-level documentation of how to use XRay in LLVM. - -:doc:`XRayExample` - An example of how to debug an application with XRay. - -:doc:`The Microsoft PDB File Format ` - A detailed description of the Microsoft PDB (Program Database) file format. - -:doc:`CFIVerify` - A description of the verification tool for Control Flow Integrity. - -:doc:`SpeculativeLoadHardening` - A description of the Speculative Load Hardening mitigation for Spectre v1. - -:doc:`StackSafetyAnalysis` - This document describes the design of the stack safety analysis of local - variables. - -:doc:`LoopTerminology` - A document describing Loops and associated terms as used in LLVM. - -:doc:`Dependence Graphs ` - A description of the design of the various dependence graphs such as - the DDG (Data Dependence Graph). From 534c86d172528d791738c0503948ee9bf883a9f3 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Tue, 8 Oct 2019 14:10:55 +0000 Subject: [PATCH 243/254] [libc++] Use PRIVATE to link benchmark dependencies It's better style to use PRIVATE when linking libraries to executables, and it doesn't make a difference since executables don't need to propagate their link-time dependencies anyway. llvm-svn: 374050 --- libcxx/benchmarks/CMakeLists.txt | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/libcxx/benchmarks/CMakeLists.txt b/libcxx/benchmarks/CMakeLists.txt index 56fadff1f9345..efe7914985382 100644 --- a/libcxx/benchmarks/CMakeLists.txt +++ b/libcxx/benchmarks/CMakeLists.txt @@ -135,16 +135,16 @@ function(add_benchmark_test name source_file) add_dependencies(${libcxx_target} cxx cxx-headers google-benchmark-libcxx) add_dependencies(cxx-benchmarks ${libcxx_target}) if (LIBCXX_ENABLE_SHARED) - target_link_libraries(${libcxx_target} cxx_shared) + target_link_libraries(${libcxx_target} PRIVATE cxx_shared) else() - target_link_libraries(${libcxx_target} cxx_static) + target_link_libraries(${libcxx_target} PRIVATE cxx_static) endif() if (TARGET cxx_experimental) - target_link_libraries(${libcxx_target} cxx_experimental) + target_link_libraries(${libcxx_target} PRIVATE cxx_experimental) endif() - target_link_libraries(${libcxx_target} -lbenchmark) + target_link_libraries(${libcxx_target} PRIVATE -lbenchmark) if (LLVM_USE_SANITIZER) - target_link_libraries(${libcxx_target} -ldl) + target_link_libraries(${libcxx_target} PRIVATE -ldl) endif() set_target_properties(${libcxx_target} PROPERTIES @@ -161,14 +161,14 @@ function(add_benchmark_test name source_file) add_executable(${native_target} EXCLUDE_FROM_ALL ${source_file}) add_dependencies(${native_target} google-benchmark-native google-benchmark-libcxx) - target_link_libraries(${native_target} -lbenchmark) + target_link_libraries(${native_target} PRIVATE -lbenchmark) if (LIBCXX_BENCHMARK_NATIVE_STDLIB STREQUAL "libstdc++") - target_link_libraries(${native_target} ${LIBSTDCXX_FILESYSTEM_LIB}) + target_link_libraries(${native_target} PRIVATE ${LIBSTDCXX_FILESYSTEM_LIB}) elseif (LIBCXX_BENCHMARK_NATIVE_STDLIB STREQUAL "libc++") - target_link_libraries(${native_target} -lc++fs -lc++experimental) + target_link_libraries(${native_target} PRIVATE -lc++fs -lc++experimental) endif() if (LIBCXX_HAS_PTHREAD_LIB) - target_link_libraries(${native_target} -pthread) + target_link_libraries(${native_target} PRIVATE -pthread) endif() add_dependencies(cxx-benchmarks ${native_target}) set_target_properties(${native_target} From 6e0b1ce48e3cf95f447b932b03d8294903aa56e0 Mon Sep 17 00:00:00 2001 From: Pavel Labath Date: Tue, 8 Oct 2019 14:15:32 +0000 Subject: [PATCH 244/254] Object/minidump: Add support for the MemoryInfoList stream Summary: This patch adds the definitions of the constants and structures necessary to interpret the MemoryInfoList minidump stream, as well as the object::MinidumpFile interface to access the stream. While the code is fairly simple, there is one important deviation from the other minidump streams, which is worth calling out explicitly. Unlike other "List" streams, the size of the records inside MemoryInfoList stream is not known statically. Instead it is described in the stream header. This makes it impossible to return ArrayRef from the accessor method, as it is done with other streams. Instead, I create an iterator class, which can be parameterized by the runtime size of the structure, and return iterator_range instead. Reviewers: amccarth, jhenderson, clayborg Subscribers: JosephTremoulet, zturner, markmentovai, lldb-commits, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D68210 llvm-svn: 374051 --- llvm/include/llvm/BinaryFormat/Minidump.h | 37 ++++ .../llvm/BinaryFormat/MinidumpConstants.def | 41 +++- llvm/include/llvm/Object/Minidump.h | 68 ++++-- llvm/lib/Object/Minidump.cpp | 46 ++-- llvm/unittests/Object/MinidumpTest.cpp | 199 ++++++++++++++++++ 5 files changed, 362 insertions(+), 29 deletions(-) diff --git a/llvm/include/llvm/BinaryFormat/Minidump.h b/llvm/include/llvm/BinaryFormat/Minidump.h index 65c17d1eb00c1..93df467cd823f 100644 --- a/llvm/include/llvm/BinaryFormat/Minidump.h +++ b/llvm/include/llvm/BinaryFormat/Minidump.h @@ -18,6 +18,7 @@ #ifndef LLVM_BINARYFORMAT_MINIDUMP_H #define LLVM_BINARYFORMAT_MINIDUMP_H +#include "llvm/ADT/BitmaskEnum.h" #include "llvm/ADT/DenseMapInfo.h" #include "llvm/Support/Endian.h" @@ -67,6 +68,42 @@ struct MemoryDescriptor { }; static_assert(sizeof(MemoryDescriptor) == 16, ""); +struct MemoryInfoListHeader { + support::ulittle32_t SizeOfHeader; + support::ulittle32_t SizeOfEntry; + support::ulittle64_t NumberOfEntries; +}; +static_assert(sizeof(MemoryInfoListHeader) == 16, ""); + +enum class MemoryProtection : uint32_t { +#define HANDLE_MDMP_PROTECT(CODE, NAME, NATIVENAME) NAME = CODE, +#include "llvm/BinaryFormat/MinidumpConstants.def" + LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/0xffffffffu), +}; + +enum class MemoryState : uint32_t { +#define HANDLE_MDMP_MEMSTATE(CODE, NAME, NATIVENAME) NAME = CODE, +#include "llvm/BinaryFormat/MinidumpConstants.def" +}; + +enum class MemoryType : uint32_t { +#define HANDLE_MDMP_MEMTYPE(CODE, NAME, NATIVENAME) NAME = CODE, +#include "llvm/BinaryFormat/MinidumpConstants.def" +}; + +struct MemoryInfo { + support::ulittle64_t BaseAddress; + support::ulittle64_t AllocationBase; + support::little_t AllocationProtect; + support::ulittle32_t Reserved0; + support::ulittle64_t RegionSize; + support::little_t State; + support::little_t Protect; + support::little_t Type; + support::ulittle32_t Reserved1; +}; +static_assert(sizeof(MemoryInfo) == 48, ""); + /// Specifies the location and type of a single stream in the minidump file. The /// minidump stream directory is an array of entries of this type, with its size /// given by Header.NumberOfStreams. diff --git a/llvm/include/llvm/BinaryFormat/MinidumpConstants.def b/llvm/include/llvm/BinaryFormat/MinidumpConstants.def index d4f13dd992179..aeef399af7a43 100644 --- a/llvm/include/llvm/BinaryFormat/MinidumpConstants.def +++ b/llvm/include/llvm/BinaryFormat/MinidumpConstants.def @@ -6,8 +6,9 @@ // //===----------------------------------------------------------------------===// -#if !(defined HANDLE_MDMP_STREAM_TYPE || defined HANDLE_MDMP_ARCH || \ - defined HANDLE_MDMP_PLATFORM) +#if !(defined(HANDLE_MDMP_STREAM_TYPE) || defined(HANDLE_MDMP_ARCH) || \ + defined(HANDLE_MDMP_PLATFORM) || defined(HANDLE_MDMP_PROTECT) || \ + defined(HANDLE_MDMP_MEMSTATE) || defined(HANDLE_MDMP_MEMTYPE)) #error "Missing HANDLE_MDMP definition" #endif @@ -23,6 +24,18 @@ #define HANDLE_MDMP_PLATFORM(CODE, NAME) #endif +#ifndef HANDLE_MDMP_PROTECT +#define HANDLE_MDMP_PROTECT(CODE, NAME, NATIVENAME) +#endif + +#ifndef HANDLE_MDMP_MEMSTATE +#define HANDLE_MDMP_MEMSTATE(CODE, NAME, NATIVENAME) +#endif + +#ifndef HANDLE_MDMP_MEMTYPE +#define HANDLE_MDMP_MEMTYPE(CODE, NAME, NATIVENAME) +#endif + HANDLE_MDMP_STREAM_TYPE(0x0003, ThreadList) HANDLE_MDMP_STREAM_TYPE(0x0004, ModuleList) HANDLE_MDMP_STREAM_TYPE(0x0005, MemoryList) @@ -102,6 +115,30 @@ HANDLE_MDMP_PLATFORM(0x8203, Android) // Android HANDLE_MDMP_PLATFORM(0x8204, PS3) // PS3 HANDLE_MDMP_PLATFORM(0x8205, NaCl) // Native Client (NaCl) +HANDLE_MDMP_PROTECT(0x01, NoAccess, PAGE_NO_ACCESS) +HANDLE_MDMP_PROTECT(0x02, ReadOnly, PAGE_READ_ONLY) +HANDLE_MDMP_PROTECT(0x04, ReadWrite, PAGE_READ_WRITE) +HANDLE_MDMP_PROTECT(0x08, WriteCopy, PAGE_WRITE_COPY) +HANDLE_MDMP_PROTECT(0x10, Execute, PAGE_EXECUTE) +HANDLE_MDMP_PROTECT(0x20, ExecuteRead, PAGE_EXECUTE_READ) +HANDLE_MDMP_PROTECT(0x40, ExecuteReadWrite, PAGE_EXECUTE_READ_WRITE) +HANDLE_MDMP_PROTECT(0x80, ExeciteWriteCopy, PAGE_EXECUTE_WRITE_COPY) +HANDLE_MDMP_PROTECT(0x100, Guard, PAGE_GUARD) +HANDLE_MDMP_PROTECT(0x200, NoCache, PAGE_NOCACHE) +HANDLE_MDMP_PROTECT(0x400, WriteCombine, PAGE_WRITECOMBINE) +HANDLE_MDMP_PROTECT(0x40000000, TargetsInvalid, PAGE_TARGETS_INVALID) + +HANDLE_MDMP_MEMSTATE(0x01000, Commit, MEM_COMMIT) +HANDLE_MDMP_MEMSTATE(0x02000, Reserve, MEM_RESERVE) +HANDLE_MDMP_MEMSTATE(0x10000, Free, MEM_FREE) + +HANDLE_MDMP_MEMTYPE(0x0020000, Private, MEM_PRIVATE) +HANDLE_MDMP_MEMTYPE(0x0040000, Mapped, MEM_MAPPED) +HANDLE_MDMP_MEMTYPE(0x1000000, Image, MEM_IMAGE) + #undef HANDLE_MDMP_STREAM_TYPE #undef HANDLE_MDMP_ARCH #undef HANDLE_MDMP_PLATFORM +#undef HANDLE_MDMP_PROTECT +#undef HANDLE_MDMP_MEMSTATE +#undef HANDLE_MDMP_MEMTYPE diff --git a/llvm/include/llvm/Object/Minidump.h b/llvm/include/llvm/Object/Minidump.h index 470008d552e73..92cab92bfca83 100644 --- a/llvm/include/llvm/Object/Minidump.h +++ b/llvm/include/llvm/Object/Minidump.h @@ -11,6 +11,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/iterator.h" #include "llvm/BinaryFormat/Minidump.h" #include "llvm/Object/Binary.h" #include "llvm/Support/Error.h" @@ -80,16 +81,56 @@ class MinidumpFile : public Binary { return getListStream(minidump::StreamType::ThreadList); } - /// Returns the list of memory ranges embedded in the MemoryList stream. An - /// error is returned if the file does not contain this stream, or if the - /// stream is not large enough to contain the number of memory descriptors - /// declared in the stream header. The consistency of the MemoryDescriptor - /// entries themselves is not checked in any way. + /// Returns the list of descriptors embedded in the MemoryList stream. The + /// descriptors provide the content of interesting regions of memory at the + /// time the minidump was taken. An error is returned if the file does not + /// contain this stream, or if the stream is not large enough to contain the + /// number of memory descriptors declared in the stream header. The + /// consistency of the MemoryDescriptor entries themselves is not checked in + /// any way. Expected> getMemoryList() const { return getListStream( minidump::StreamType::MemoryList); } + class MemoryInfoIterator + : public iterator_facade_base { + public: + MemoryInfoIterator(ArrayRef Storage, size_t Stride) + : Storage(Storage), Stride(Stride) { + assert(Storage.size() % Stride == 0); + } + + bool operator==(const MemoryInfoIterator &R) const { + return Storage.size() == R.Storage.size(); + } + + const minidump::MemoryInfo &operator*() const { + assert(Storage.size() >= sizeof(minidump::MemoryInfo)); + return *reinterpret_cast(Storage.data()); + } + + MemoryInfoIterator &operator++() { + Storage = Storage.drop_front(Stride); + return *this; + } + + private: + ArrayRef Storage; + size_t Stride; + }; + + /// Returns the list of descriptors embedded in the MemoryInfoList stream. The + /// descriptors provide properties (e.g. permissions) of interesting regions + /// of memory at the time the minidump was taken. An error is returned if the + /// file does not contain this stream, or if the stream is not large enough to + /// contain the number of memory descriptors declared in the stream header. + /// The consistency of the MemoryInfoList entries themselves is not checked + /// in any way. + Expected> getMemoryInfoList() const; + private: static Error createError(StringRef Str) { return make_error(Str, object_error::parse_failed); @@ -137,10 +178,10 @@ class MinidumpFile : public Binary { }; template -Expected MinidumpFile::getStream(minidump::StreamType Stream) const { - if (auto OptionalStream = getRawStream(Stream)) { - if (OptionalStream->size() >= sizeof(T)) - return *reinterpret_cast(OptionalStream->data()); +Expected MinidumpFile::getStream(minidump::StreamType Type) const { + if (Optional> Stream = getRawStream(Type)) { + if (Stream->size() >= sizeof(T)) + return *reinterpret_cast(Stream->data()); return createEOFError(); } return createError("No such stream"); @@ -153,10 +194,11 @@ Expected> MinidumpFile::getDataSliceAs(ArrayRef Data, // Check for overflow. if (Count > std::numeric_limits::max() / sizeof(T)) return createEOFError(); - auto ExpectedArray = getDataSlice(Data, Offset, sizeof(T) * Count); - if (!ExpectedArray) - return ExpectedArray.takeError(); - return ArrayRef(reinterpret_cast(ExpectedArray->data()), Count); + Expected> Slice = + getDataSlice(Data, Offset, sizeof(T) * Count); + if (!Slice) + return Slice.takeError(); + return ArrayRef(reinterpret_cast(Slice->data()), Count); } } // end namespace object diff --git a/llvm/lib/Object/Minidump.cpp b/llvm/lib/Object/Minidump.cpp index 7b5b215586990..3e932fe7be28b 100644 --- a/llvm/lib/Object/Minidump.cpp +++ b/llvm/lib/Object/Minidump.cpp @@ -53,13 +53,30 @@ Expected MinidumpFile::getString(size_t Offset) const { return Result; } +Expected> +MinidumpFile::getMemoryInfoList() const { + Optional> Stream = getRawStream(StreamType::MemoryInfoList); + if (!Stream) + return createError("No such stream"); + auto ExpectedHeader = + getDataSliceAs(*Stream, 0, 1); + if (!ExpectedHeader) + return ExpectedHeader.takeError(); + const minidump::MemoryInfoListHeader &H = ExpectedHeader.get()[0]; + Expected> Data = + getDataSlice(*Stream, H.SizeOfHeader, H.SizeOfEntry * H.NumberOfEntries); + if (!Data) + return Data.takeError(); + return make_range(MemoryInfoIterator(*Data, H.SizeOfEntry), + MemoryInfoIterator({}, H.SizeOfEntry)); +} + template -Expected> MinidumpFile::getListStream(StreamType Stream) const { - auto OptionalStream = getRawStream(Stream); - if (!OptionalStream) +Expected> MinidumpFile::getListStream(StreamType Type) const { + Optional> Stream = getRawStream(Type); + if (!Stream) return createError("No such stream"); - auto ExpectedSize = - getDataSliceAs(*OptionalStream, 0, 1); + auto ExpectedSize = getDataSliceAs(*Stream, 0, 1); if (!ExpectedSize) return ExpectedSize.takeError(); @@ -69,10 +86,10 @@ Expected> MinidumpFile::getListStream(StreamType Stream) const { // Some producers insert additional padding bytes to align the list to an // 8-byte boundary. Check for that by comparing the list size with the overall // stream size. - if (ListOffset + sizeof(T) * ListSize < OptionalStream->size()) + if (ListOffset + sizeof(T) * ListSize < Stream->size()) ListOffset = 8; - return getDataSliceAs(*OptionalStream, ListOffset, ListSize); + return getDataSliceAs(*Stream, ListOffset, ListSize); } template Expected> MinidumpFile::getListStream(StreamType) const; @@ -109,13 +126,14 @@ MinidumpFile::create(MemoryBufferRef Source) { return ExpectedStreams.takeError(); DenseMap StreamMap; - for (const auto &Stream : llvm::enumerate(*ExpectedStreams)) { - StreamType Type = Stream.value().Type; - const LocationDescriptor &Loc = Stream.value().Location; + for (const auto &StreamDescriptor : llvm::enumerate(*ExpectedStreams)) { + StreamType Type = StreamDescriptor.value().Type; + const LocationDescriptor &Loc = StreamDescriptor.value().Location; - auto ExpectedStream = getDataSlice(Data, Loc.RVA, Loc.DataSize); - if (!ExpectedStream) - return ExpectedStream.takeError(); + Expected> Stream = + getDataSlice(Data, Loc.RVA, Loc.DataSize); + if (!Stream) + return Stream.takeError(); if (Type == StreamType::Unused && Loc.DataSize == 0) { // Ignore dummy streams. This is technically ill-formed, but a number of @@ -128,7 +146,7 @@ MinidumpFile::create(MemoryBufferRef Source) { return createError("Cannot handle one of the minidump streams"); // Update the directory map, checking for duplicate stream types. - if (!StreamMap.try_emplace(Type, Stream.index()).second) + if (!StreamMap.try_emplace(Type, StreamDescriptor.index()).second) return createError("Duplicate stream type"); } diff --git a/llvm/unittests/Object/MinidumpTest.cpp b/llvm/unittests/Object/MinidumpTest.cpp index ba9af5a824752..5abb5b77fa296 100644 --- a/llvm/unittests/Object/MinidumpTest.cpp +++ b/llvm/unittests/Object/MinidumpTest.cpp @@ -511,3 +511,202 @@ TEST(MinidumpFile, getMemoryList) { EXPECT_EQ(0x00090807u, MD.Memory.RVA); } } + +TEST(MinidumpFile, getMemoryInfoList) { + std::vector OneEntry{ + // Header + 'M', 'D', 'M', 'P', 0x93, 0xa7, 0, 0, // Signature, Version + 1, 0, 0, 0, // NumberOfStreams, + 32, 0, 0, 0, // StreamDirectoryRVA + 0, 1, 2, 3, 4, 5, 6, 7, // Checksum, TimeDateStamp + 0, 0, 0, 0, 0, 0, 0, 0, // Flags + // Stream Directory + 16, 0, 0, 0, 64, 0, 0, 0, // Type, DataSize, + 44, 0, 0, 0, // RVA + // MemoryInfoListHeader + 16, 0, 0, 0, 48, 0, 0, 0, // SizeOfHeader, SizeOfEntry + 1, 0, 0, 0, 0, 0, 0, 0, // NumberOfEntries + // MemoryInfo + 0, 1, 2, 3, 4, 5, 6, 7, // BaseAddress + 8, 9, 0, 1, 2, 3, 4, 5, // AllocationBase + 16, 0, 0, 0, 6, 7, 8, 9, // AllocationProtect, Reserved0 + 0, 1, 2, 3, 4, 5, 6, 7, // RegionSize + 0, 16, 0, 0, 32, 0, 0, 0, // State, Protect + 0, 0, 2, 0, 8, 9, 0, 1, // Type, Reserved1 + }; + + // Same as before, but the list header is larger. + std::vector BiggerHeader{ + // Header + 'M', 'D', 'M', 'P', 0x93, 0xa7, 0, 0, // Signature, Version + 1, 0, 0, 0, // NumberOfStreams, + 32, 0, 0, 0, // StreamDirectoryRVA + 0, 1, 2, 3, 4, 5, 6, 7, // Checksum, TimeDateStamp + 0, 0, 0, 0, 0, 0, 0, 0, // Flags + // Stream Directory + 16, 0, 0, 0, 68, 0, 0, 0, // Type, DataSize, + 44, 0, 0, 0, // RVA + // MemoryInfoListHeader + 20, 0, 0, 0, 48, 0, 0, 0, // SizeOfHeader, SizeOfEntry + 1, 0, 0, 0, 0, 0, 0, 0, // NumberOfEntries + 0, 0, 0, 0, // ??? + // MemoryInfo + 0, 1, 2, 3, 4, 5, 6, 7, // BaseAddress + 8, 9, 0, 1, 2, 3, 4, 5, // AllocationBase + 16, 0, 0, 0, 6, 7, 8, 9, // AllocationProtect, Reserved0 + 0, 1, 2, 3, 4, 5, 6, 7, // RegionSize + 0, 16, 0, 0, 32, 0, 0, 0, // State, Protect + 0, 0, 2, 0, 8, 9, 0, 1, // Type, Reserved1 + }; + + // Same as before, but the entry is larger. + std::vector BiggerEntry{ + // Header + 'M', 'D', 'M', 'P', 0x93, 0xa7, 0, 0, // Signature, Version + 1, 0, 0, 0, // NumberOfStreams, + 32, 0, 0, 0, // StreamDirectoryRVA + 0, 1, 2, 3, 4, 5, 6, 7, // Checksum, TimeDateStamp + 0, 0, 0, 0, 0, 0, 0, 0, // Flags + // Stream Directory + 16, 0, 0, 0, 68, 0, 0, 0, // Type, DataSize, + 44, 0, 0, 0, // RVA + // MemoryInfoListHeader + 16, 0, 0, 0, 52, 0, 0, 0, // SizeOfHeader, SizeOfEntry + 1, 0, 0, 0, 0, 0, 0, 0, // NumberOfEntries + // MemoryInfo + 0, 1, 2, 3, 4, 5, 6, 7, // BaseAddress + 8, 9, 0, 1, 2, 3, 4, 5, // AllocationBase + 16, 0, 0, 0, 6, 7, 8, 9, // AllocationProtect, Reserved0 + 0, 1, 2, 3, 4, 5, 6, 7, // RegionSize + 0, 16, 0, 0, 32, 0, 0, 0, // State, Protect + 0, 0, 2, 0, 8, 9, 0, 1, // Type, Reserved1 + 0, 0, 0, 0, // ??? + }; + + for (ArrayRef Data : {OneEntry, BiggerHeader, BiggerEntry}) { + auto ExpectedFile = create(Data); + ASSERT_THAT_EXPECTED(ExpectedFile, Succeeded()); + const MinidumpFile &File = **ExpectedFile; + auto ExpectedInfo = File.getMemoryInfoList(); + ASSERT_THAT_EXPECTED(ExpectedInfo, Succeeded()); + ASSERT_EQ(1u, std::distance(ExpectedInfo->begin(), ExpectedInfo->end())); + const MemoryInfo &Info = *ExpectedInfo.get().begin(); + EXPECT_EQ(0x0706050403020100u, Info.BaseAddress); + EXPECT_EQ(0x0504030201000908u, Info.AllocationBase); + EXPECT_EQ(MemoryProtection::Execute, Info.AllocationProtect); + EXPECT_EQ(0x09080706u, Info.Reserved0); + EXPECT_EQ(0x0706050403020100u, Info.RegionSize); + EXPECT_EQ(MemoryState::Commit, Info.State); + EXPECT_EQ(MemoryProtection::ExecuteRead, Info.Protect); + EXPECT_EQ(MemoryType::Private, Info.Type); + EXPECT_EQ(0x01000908u, Info.Reserved1); + } + + // Header does not fit into the stream. + std::vector HeaderTooBig{ + // Header + 'M', 'D', 'M', 'P', 0x93, 0xa7, 0, 0, // Signature, Version + 1, 0, 0, 0, // NumberOfStreams, + 32, 0, 0, 0, // StreamDirectoryRVA + 0, 1, 2, 3, 4, 5, 6, 7, // Checksum, TimeDateStamp + 0, 0, 0, 0, 0, 0, 0, 0, // Flags + // Stream Directory + 16, 0, 0, 0, 15, 0, 0, 0, // Type, DataSize, + 44, 0, 0, 0, // RVA + // MemoryInfoListHeader + 16, 0, 0, 0, 48, 0, 0, 0, // SizeOfHeader, SizeOfEntry + 1, 0, 0, 0, 0, 0, 0, // ??? + }; + Expected> File = create(HeaderTooBig); + ASSERT_THAT_EXPECTED(File, Succeeded()); + EXPECT_THAT_EXPECTED(File.get()->getMemoryInfoList(), Failed()); + + // Header fits into the stream, but it is too small to contain the required + // entries. + std::vector HeaderTooSmall{ + // Header + 'M', 'D', 'M', 'P', 0x93, 0xa7, 0, 0, // Signature, Version + 1, 0, 0, 0, // NumberOfStreams, + 32, 0, 0, 0, // StreamDirectoryRVA + 0, 1, 2, 3, 4, 5, 6, 7, // Checksum, TimeDateStamp + 0, 0, 0, 0, 0, 0, 0, 0, // Flags + // Stream Directory + 16, 0, 0, 0, 15, 0, 0, 0, // Type, DataSize, + 44, 0, 0, 0, // RVA + // MemoryInfoListHeader + 15, 0, 0, 0, 48, 0, 0, 0, // SizeOfHeader, SizeOfEntry + 1, 0, 0, 0, 0, 0, 0, // ??? + }; + File = create(HeaderTooSmall); + ASSERT_THAT_EXPECTED(File, Succeeded()); + EXPECT_THAT_EXPECTED(File.get()->getMemoryInfoList(), Failed()); + + std::vector EntryTooBig{ + // Header + 'M', 'D', 'M', 'P', 0x93, 0xa7, 0, 0, // Signature, Version + 1, 0, 0, 0, // NumberOfStreams, + 32, 0, 0, 0, // StreamDirectoryRVA + 0, 1, 2, 3, 4, 5, 6, 7, // Checksum, TimeDateStamp + 0, 0, 0, 0, 0, 0, 0, 0, // Flags + // Stream Directory + 16, 0, 0, 0, 64, 0, 0, 0, // Type, DataSize, + 44, 0, 0, 0, // RVA + // MemoryInfoListHeader + 16, 0, 0, 0, 49, 0, 0, 0, // SizeOfHeader, SizeOfEntry + 1, 0, 0, 0, 0, 0, 0, 0, // NumberOfEntries + // MemoryInfo + 0, 1, 2, 3, 4, 5, 6, 7, // BaseAddress + 8, 9, 0, 1, 2, 3, 4, 5, // AllocationBase + 16, 0, 0, 0, 6, 7, 8, 9, // AllocationProtect, Reserved0 + 0, 1, 2, 3, 4, 5, 6, 7, // RegionSize + 0, 16, 0, 0, 32, 0, 0, 0, // State, Protect + 0, 0, 2, 0, 8, 9, 0, 1, // Type, Reserved1 + }; + File = create(EntryTooBig); + ASSERT_THAT_EXPECTED(File, Succeeded()); + EXPECT_THAT_EXPECTED(File.get()->getMemoryInfoList(), Failed()); + + std::vector ThreeEntries{ + // Header + 'M', 'D', 'M', 'P', 0x93, 0xa7, 0, 0, // Signature, Version + 1, 0, 0, 0, // NumberOfStreams, + 32, 0, 0, 0, // StreamDirectoryRVA + 0, 1, 2, 3, 4, 5, 6, 7, // Checksum, TimeDateStamp + 0, 0, 0, 0, 0, 0, 0, 0, // Flags + // Stream Directory + 16, 0, 0, 0, 160, 0, 0, 0, // Type, DataSize, + 44, 0, 0, 0, // RVA + // MemoryInfoListHeader + 16, 0, 0, 0, 48, 0, 0, 0, // SizeOfHeader, SizeOfEntry + 3, 0, 0, 0, 0, 0, 0, 0, // NumberOfEntries + // MemoryInfo + 0, 1, 2, 3, 0, 0, 0, 0, // BaseAddress + 0, 0, 0, 0, 0, 0, 0, 0, // AllocationBase + 0, 0, 0, 0, 0, 0, 0, 0, // AllocationProtect, Reserved0 + 0, 0, 0, 0, 0, 0, 0, 0, // RegionSize + 0, 0, 0, 0, 0, 0, 0, 0, // State, Protect + 0, 0, 0, 0, 0, 0, 0, 0, // Type, Reserved1 + 0, 0, 4, 5, 6, 7, 0, 0, // BaseAddress + 0, 0, 0, 0, 0, 0, 0, 0, // AllocationBase + 0, 0, 0, 0, 0, 0, 0, 0, // AllocationProtect, Reserved0 + 0, 0, 0, 0, 0, 0, 0, 0, // RegionSize + 0, 0, 0, 0, 0, 0, 0, 0, // State, Protect + 0, 0, 0, 0, 0, 0, 0, 0, // Type, Reserved1 + 0, 0, 0, 8, 9, 0, 1, 0, // BaseAddress + 0, 0, 0, 0, 0, 0, 0, 0, // AllocationBase + 0, 0, 0, 0, 0, 0, 0, 0, // AllocationProtect, Reserved0 + 0, 0, 0, 0, 0, 0, 0, 0, // RegionSize + 0, 0, 0, 0, 0, 0, 0, 0, // State, Protect + 0, 0, 0, 0, 0, 0, 0, 0, // Type, Reserved1 + }; + File = create(ThreeEntries); + ASSERT_THAT_EXPECTED(File, Succeeded()); + auto ExpectedInfo = File.get()->getMemoryInfoList(); + ASSERT_THAT_EXPECTED(ExpectedInfo, Succeeded()); + EXPECT_THAT(to_vector<3>(map_range(*ExpectedInfo, + [](const MemoryInfo &Info) -> uint64_t { + return Info.BaseAddress; + })), + testing::ElementsAre(0x0000000003020100u, 0x0000070605040000u, + 0x0001000908000000u)); +} From aca5d395d541b4adb9a9808984da24044e0b4ac7 Mon Sep 17 00:00:00 2001 From: Sid Manning Date: Tue, 8 Oct 2019 14:23:49 +0000 Subject: [PATCH 245/254] [lld][Hexagon] Support PLT relocation R_HEX_B15_PCREL_X/R_HEX_B9_PCREL_X These are sometimes generated by tail call optimizations. Differential Revision: https://reviews.llvm.org/D66542 llvm-svn: 374052 --- lld/ELF/Arch/Hexagon.cpp | 4 +- lld/test/ELF/hexagon-plt.s | 102 ++++++++++++++++++++++++++++++++++ lld/test/ELF/hexagon-shared.s | 76 +++++++++++++++++-------- 3 files changed, 157 insertions(+), 25 deletions(-) create mode 100644 lld/test/ELF/hexagon-plt.s diff --git a/lld/ELF/Arch/Hexagon.cpp b/lld/ELF/Arch/Hexagon.cpp index 033500337ec2f..e716040d253e7 100644 --- a/lld/ELF/Arch/Hexagon.cpp +++ b/lld/ELF/Arch/Hexagon.cpp @@ -103,13 +103,13 @@ RelExpr Hexagon::getRelExpr(RelType type, const Symbol &s, case R_HEX_LO16: return R_ABS; case R_HEX_B9_PCREL: - case R_HEX_B9_PCREL_X: case R_HEX_B13_PCREL: case R_HEX_B15_PCREL: - case R_HEX_B15_PCREL_X: case R_HEX_6_PCREL_X: case R_HEX_32_PCREL: return R_PC; + case R_HEX_B9_PCREL_X: + case R_HEX_B15_PCREL_X: case R_HEX_B22_PCREL: case R_HEX_PLT_B22_PCREL: case R_HEX_B22_PCREL_X: diff --git a/lld/test/ELF/hexagon-plt.s b/lld/test/ELF/hexagon-plt.s new file mode 100644 index 0000000000000..e390880a2030b --- /dev/null +++ b/lld/test/ELF/hexagon-plt.s @@ -0,0 +1,102 @@ +# REQUIRES: hexagon +# RUN: echo '.globl bar, weak; .type bar,@function; .type weak,@function; bar: weak:' > %t1.s + +# RUN: llvm-mc -filetype=obj -triple=hexagon-unknown-elf %t1.s -o %t1.o +# RUN: ld.lld -shared %t1.o -soname=t1.so -o %t1.so +# RUN: llvm-mc -mno-fixup -filetype=obj -triple=hexagon-unknown-elf %s -o %t.o +# RUN: ld.lld %t.o %t1.so -z separate-code -o %t +# RUN: llvm-readelf -S -s %t | FileCheck --check-prefixes=SEC,NM %s +# RUN: llvm-readobj -r %t | FileCheck --check-prefix=RELOC %s +# RUN: llvm-readelf -x .got.plt %t | FileCheck --check-prefix=GOTPLT %s +# RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck --check-prefixes=DIS %s + +# SEC: .plt PROGBITS {{0*}}00020040 + +## A canonical PLT has a non-zero st_value. bar and weak are called but their +## addresses are not taken, so a canonical PLT is not necessary. +# NM: {{0*}}00000000 0 FUNC GLOBAL DEFAULT UND bar +# NM: {{0*}}00000000 0 FUNC WEAK DEFAULT UND weak + +## The .got.plt slots relocated by .rela.plt point to .plt +## This is required by glibc. +# RELOC: .rela.plt { +# RELOC-NEXT: 0x40078 R_HEX_JMP_SLOT bar 0x0 +# RELOC-NEXT: 0x4007C R_HEX_JMP_SLOT weak 0x0 +# RELOC-NEXT: } +# GOTPLT: section '.got.plt' +# GOTPLT-NEXT: 0x00040068 00000000 00000000 00000000 00000000 +# GOTPLT-NEXT: 0x00040078 00000000 00000000 + +# DIS: _start: +## Direct call +## Call foo directly +# DIS-NEXT: { call 0x2003c } +## Call bar via plt +# DIS-NEXT: { call 0x20060 } +## Call weak via plt +# DIS-NEXT: { call 0x20070 } +# DIS-NEXT: { immext(#0) + +## Call foo directly +# DIS-NEXT: if (p0) jump:nt 0x2003c } +# DIS-NEXT: { immext(#64) +## Call bar via plt +# DIS-NEXT: if (p0) jump:nt 0x20060 } +# DIS-NEXT: { immext(#64) +## Call weak via plt +# DIS-NEXT: if (p0) jump:nt 0x20070 } +# DIS-NEXT: { immext(#0) + +## Call foo directly +# DIS-NEXT: r0 = #0 ; jump 0x2003c } +# DIS-NEXT: { immext(#0) +## Call bar via plt +# DIS-NEXT: r0 = #0 ; jump 0x20060 } +# DIS-NEXT: { immext(#0) +## Call weak via plt +# DIS-NEXT: r0 = #0 ; jump 0x20070 } + +# DIS: foo: +# DIS-NEXT: 2003c: + + +# DIS: Disassembly of section .plt: + +# DIS: 00020040 .plt: +# DIS-NEXT: 20040: { immext(#131072) +# DIS-NEXT: 20044: r28 = add(pc,##131112) } +# DIS-NEXT: 20048: { r14 -= add(r28,#16) +# DIS-NEXT: 2004c: r15 = memw(r28+#8) +# DIS-NEXT: 20050: r28 = memw(r28+#4) } +# DIS-NEXT: 20054: { r14 = asr(r14,#2) +# DIS-NEXT: 20058: jumpr r28 } +# DIS-NEXT: 2005c: { trap0(#219) } +## bar's plt slot +# DIS-NEXT: 20060: { immext(#131072) +# DIS-NEXT: 20064: r14 = add(pc,##131096) } +# DIS-NEXT: 20068: { r28 = memw(r14+#0) } +# DIS-NEXT: 2006c: { jumpr r28 } +## weak's plt slot +# DIS-NEXT: 20070: { immext(#131072) +# DIS-NEXT: 20074: r14 = add(pc,##131084) } +# DIS-NEXT: 20078: { r28 = memw(r14+#0) } +# DIS-NEXT: 2007c: { jumpr r28 } + + +.global _start, foo, bar +.weak weak + +_start: + call foo + call bar + call weak + if (p0) jump foo + if (p0) jump bar + if (p0) jump weak + { r0 = #0; jump foo } + { r0 = #0; jump bar } + { r0 = #0; jump weak } + +## foo is local and non-preemptale, no PLT is generated. +foo: + jumpr r31 diff --git a/lld/test/ELF/hexagon-shared.s b/lld/test/ELF/hexagon-shared.s index 4ec38d5c369c8..6bc1bd567ed5d 100644 --- a/lld/test/ELF/hexagon-shared.s +++ b/lld/test/ELF/hexagon-shared.s @@ -1,15 +1,23 @@ # REQUIRES: hexagon -# RUN: llvm-mc -filetype=obj -triple=hexagon-unknown-elf %s -o %t +# RUN: llvm-mc -mno-fixup -filetype=obj -triple=hexagon-unknown-elf %s -o %t.o # RUN: llvm-mc -filetype=obj -triple=hexagon-unknown-elf %S/Inputs/hexagon-shared.s -o %t2.o # RUN: ld.lld -shared %t2.o -soname so -o %t3.so -# RUN: ld.lld -shared %t %t3.so -o %t4.so +# RUN: ld.lld -shared %t.o %t3.so -o %t4.so +# RUN: ld.lld -Bsymbolic -shared %t.o %t3.so -o %t5.so # RUN: llvm-objdump -d -j .plt %t4.so | FileCheck --check-prefix=PLT %s # RUN: llvm-objdump -d -j .text %t4.so | FileCheck --check-prefix=TEXT %s # RUN: llvm-objdump -D -j .got %t4.so | FileCheck --check-prefix=GOT %s # RUN: llvm-readelf -r %t4.so | FileCheck --check-prefix=RELO %s +# RUN: llvm-readelf -r %t5.so | FileCheck --check-prefix=SYMBOLIC %s -.global foo -foo: +.global _start, foo, hidden_symbol +.hidden hidden_symbol +_start: +# When -Bsymbolic is specified calls to locally resolvables should +# not generate a plt. +call ##foo +# Calls to hidden_symbols should not trigger a plt. +call ##hidden_symbol # _HEX_32_PCREL .word _DYNAMIC - . @@ -17,6 +25,10 @@ call ##bar # R_HEX_PLT_B22_PCREL call bar@PLT +# R_HEX_B15_PCREL_X +if (p0) jump bar +# R_HEX_B9_PCREL_X +{ r0 = #0; jump bar } # R_HEX_GOT_11_X and R_HEX_GOT_32_6_X r2=add(pc,##_GLOBAL_OFFSET_TABLE_@PCREL) @@ -26,6 +38,13 @@ jumpr r0 # R_HEX_GOT_16_X r0 = add(r1,##bar@GOT) +# foo is local so no plt will be generated +foo: + jumpr lr + +hidden_symbol: + jumpr lr + # R_HEX_32 .data .global var @@ -40,26 +59,37 @@ pvar: .word var .size pvar, 4 -# PLT: { immext(#131200 -# PLT: r28 = add(pc,##131252) } -# PLT: { r14 -= add(r28,#16) -# PLT: r15 = memw(r28+#8) -# PLT: r28 = memw(r28+#4) } -# PLT: { r14 = asr(r14,#2) -# PLT: jumpr r28 } -# PLT: { trap0(#219) } -# PLT: immext(#131200) -# PLT: r14 = add(pc,##131236) } -# PLT: r28 = memw(r14+#0) } -# PLT: jumpr r28 } -# TEXT: 10218: 68 00 01 00 00010068 -# TEXT: { call 0x10270 } -# TEXT: r0 = add(r1,##-65548) } +# PLT: { immext(#131264 +# PLT-NEXT: r28 = add(pc,##131268) } +# PLT-NEXT: { r14 -= add(r28,#16) +# PLT-NEXT: r15 = memw(r28+#8) +# PLT-NEXT: r28 = memw(r28+#4) } +# PLT-NEXT: { r14 = asr(r14,#2) +# PLT-NEXT: jumpr r28 } +# PLT-NEXT: { trap0(#219) } +# PLT-NEXT: immext(#131200) +# PLT-NEXT: r14 = add(pc,##131252) } +# PLT-NEXT: r28 = memw(r14+#0) } +# PLT-NEXT: jumpr r28 } + +# TEXT: 8c 00 01 00 0001008c +# TEXT: { call 0x102d0 } +# TEXT: if (p0) jump:nt 0x102d0 +# TEXT: r0 = #0 ; jump 0x102d0 +# TEXT: r0 = add(r1,##-65548) # GOT: .got: -# GOT: 202f8: 00 00 00 00 00000000 +# GOT: 00 00 00 00 00000000 + +# RELO: R_HEX_GLOB_DAT +# RELO: R_HEX_32 +# RELO: Relocation section '.rela.plt' at offset 0x22c contains 2 entries: +# RELO: R_HEX_JMP_SLOT {{.*}} foo +# RELO-NEXT: R_HEX_JMP_SLOT {{.*}} bar +# RELO-NOT: R_HEX_JMP_SLOT {{.*}} hidden -# RELO: 000202f8 00000121 R_HEX_GLOB_DAT -# RELO: 00030300 00000406 R_HEX_32 -# RELO: 00030314 00000122 R_HEX_JMP_SLOT +# Make sure that no PLT is generated for a local call. +# SYMBOLIC: Relocation section '.rela.plt' at offset 0x22c contains 1 entries: +# SYMBOLIC: R_HEX_JMP_SLOT {{.*}} bar +# SYMBOLIC-NOT: R_HEX_JMP_SLOT {{.*}} foo From cf3ab6d96c3e9477b05bbbe8b525151320e96d71 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Tue, 8 Oct 2019 14:28:56 +0000 Subject: [PATCH 246/254] [libc++] Add missing link-time dependencies to the benchmarks Since the benchmarks build with -nostdlib, they need to manually link against some system libraries that are used by the benchmarks and the GoogleBenchmark library itself. Previously, we'd rely on the fact that these libraries were linked through the PUBLIC dependencies of cxx_shared/cxx_static. However, if we were to make these dependencies PRIVATE (as they should be because they are implementation details of libc++), the benchmarks would fail to link. This commit remediates that. llvm-svn: 374053 --- libcxx/benchmarks/CMakeLists.txt | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/libcxx/benchmarks/CMakeLists.txt b/libcxx/benchmarks/CMakeLists.txt index efe7914985382..a5b801c365482 100644 --- a/libcxx/benchmarks/CMakeLists.txt +++ b/libcxx/benchmarks/CMakeLists.txt @@ -146,6 +146,15 @@ function(add_benchmark_test name source_file) if (LLVM_USE_SANITIZER) target_link_libraries(${libcxx_target} PRIVATE -ldl) endif() + if (LIBCXX_HAS_C_LIB) + target_link_libraries(${libcxx_target} PRIVATE -lc) + endif() + if (LIBCXX_HAS_M_LIB) + target_link_libraries(${libcxx_target} PRIVATE -lm) + endif() + if (LIBCXX_HAS_PTHREAD_LIB) + target_link_libraries(${libcxx_target} PRIVATE -lpthread) + endif() set_target_properties(${libcxx_target} PROPERTIES OUTPUT_NAME "${name}.libcxx.out" From 2cd0f2895946de546d066f67c88ff365d3210017 Mon Sep 17 00:00:00 2001 From: Clement Courbet Date: Tue, 8 Oct 2019 14:30:24 +0000 Subject: [PATCH 247/254] [llvm-exegesis] Add options to SnippetGenerator. Summary: This adds a `-max-configs-per-opcode` option to limit the number of configs per opcode. Reviewers: gchatelet Subscribers: tschuett, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D68642 llvm-svn: 374054 --- llvm/docs/CommandGuide/llvm-exegesis.rst | 14 ++++++++++- .../tools/llvm-exegesis/X86/max-configs.test | 24 +++++++++++++++++++ llvm/tools/llvm-exegesis/lib/Latency.h | 2 +- .../llvm-exegesis/lib/SnippetGenerator.cpp | 6 ++++- .../llvm-exegesis/lib/SnippetGenerator.h | 7 +++++- llvm/tools/llvm-exegesis/lib/Target.cpp | 22 ++++++++--------- llvm/tools/llvm-exegesis/lib/Target.h | 7 +++--- llvm/tools/llvm-exegesis/lib/Uops.h | 2 +- llvm/tools/llvm-exegesis/lib/X86/Target.cpp | 14 ++++++----- llvm/tools/llvm-exegesis/llvm-exegesis.cpp | 11 ++++++++- .../X86/SnippetGeneratorTest.cpp | 5 ++-- 11 files changed, 86 insertions(+), 28 deletions(-) create mode 100644 llvm/test/tools/llvm-exegesis/X86/max-configs.test diff --git a/llvm/docs/CommandGuide/llvm-exegesis.rst b/llvm/docs/CommandGuide/llvm-exegesis.rst index 4ac1c76635479..81e92e7736d68 100644 --- a/llvm/docs/CommandGuide/llvm-exegesis.rst +++ b/llvm/docs/CommandGuide/llvm-exegesis.rst @@ -195,11 +195,23 @@ OPTIONS to specify at least one of the `-analysis-clusters-output-file=` and `-analysis-inconsistencies-output-file=`. -.. option:: -num-repetitions= +.. option:: -num-repetitions= Specify the number of repetitions of the asm snippet. Higher values lead to more accurate measurements but lengthen the benchmark. +.. option:: -max-configs-per-opcode= + + Specify the maximum configurations that can be generated for each opcode. + By default this is `1`, meaning that we assume that a single measurement is + enough to characterize an opcode. This might not be true of all instructions: + for example, the performance characteristics of the LEA instruction on X86 + depends on the value of assigned registers and immediates. Setting a value of + `-max-configs-per-opcode` larger than `1` allows `llvm-exegesis` to explore + more configurations to discover if some register or immediate assignments + lead to different performance characteristics. + + .. option:: -benchmarks-file= File to read (`analysis` mode) or write (`latency`/`uops`/`inverse_throughput` diff --git a/llvm/test/tools/llvm-exegesis/X86/max-configs.test b/llvm/test/tools/llvm-exegesis/X86/max-configs.test new file mode 100644 index 0000000000000..a980e85d4b5ae --- /dev/null +++ b/llvm/test/tools/llvm-exegesis/X86/max-configs.test @@ -0,0 +1,24 @@ +# RUN: llvm-exegesis -mode=latency -opcode-name=SBB8rr -max-configs-per-opcode=1 | FileCheck -check-prefixes=CHECK,CHECK1 %s +# RUN: llvm-exegesis -mode=latency -opcode-name=SBB8rr -max-configs-per-opcode=2 | FileCheck -check-prefixes=CHECK,CHECK2 %s + +CHECK: --- +CHECK-NEXT: mode: latency +CHECK-NEXT: key: +CHECK-NEXT: instructions: +CHECK-NEXT: SBB8rr +CHECK-NEXT: config: '' +CHECK-NEXT: register_initial_values: +CHECK-DAG: - '[[REG1:[A-Z0-9]+]]=0x0' +CHECK-LAST: ... + +CHECK1-NOT: SBB8rr + +CHECK2: --- +CHECK2-NEXT: mode: latency +CHECK2-NEXT: key: +CHECK2-NEXT: instructions: +CHECK2-NEXT: SBB8rr +CHECK2-NEXT: config: '' +CHECK2-NEXT: register_initial_values: +CHECK2-DAG: - '[[REG1:[A-Z0-9]+]]=0x0' +CHECK2-LAST: ... diff --git a/llvm/tools/llvm-exegesis/lib/Latency.h b/llvm/tools/llvm-exegesis/lib/Latency.h index 503f91daf8958..0ad0c2cea9b54 100644 --- a/llvm/tools/llvm-exegesis/lib/Latency.h +++ b/llvm/tools/llvm-exegesis/lib/Latency.h @@ -24,7 +24,7 @@ namespace exegesis { class LatencySnippetGenerator : public SnippetGenerator { public: - LatencySnippetGenerator(const LLVMState &State) : SnippetGenerator(State) {} + using SnippetGenerator::SnippetGenerator; ~LatencySnippetGenerator() override; llvm::Expected> diff --git a/llvm/tools/llvm-exegesis/lib/SnippetGenerator.cpp b/llvm/tools/llvm-exegesis/lib/SnippetGenerator.cpp index 879962001e6bb..1b16259b8e66a 100644 --- a/llvm/tools/llvm-exegesis/lib/SnippetGenerator.cpp +++ b/llvm/tools/llvm-exegesis/lib/SnippetGenerator.cpp @@ -33,7 +33,8 @@ std::vector getSingleton(CodeTemplate &&CT) { SnippetGeneratorFailure::SnippetGeneratorFailure(const llvm::Twine &S) : llvm::StringError(S, llvm::inconvertibleErrorCode()) {} -SnippetGenerator::SnippetGenerator(const LLVMState &State) : State(State) {} +SnippetGenerator::SnippetGenerator(const LLVMState &State, const Options &Opts) + : State(State), Opts(Opts) {} SnippetGenerator::~SnippetGenerator() = default; @@ -81,6 +82,9 @@ SnippetGenerator::generateConfigurations( computeRegisterInitialValues(CT.Instructions); BC.Key.Config = CT.Config; Output.push_back(std::move(BC)); + if (Output.size() >= Opts.MaxConfigsPerOpcode) + return Output; // Early exit if we exceeded the number of allowed + // configs. } } return Output; diff --git a/llvm/tools/llvm-exegesis/lib/SnippetGenerator.h b/llvm/tools/llvm-exegesis/lib/SnippetGenerator.h index c2ea1c124738e..8e8cd6fcb9952 100644 --- a/llvm/tools/llvm-exegesis/lib/SnippetGenerator.h +++ b/llvm/tools/llvm-exegesis/lib/SnippetGenerator.h @@ -51,7 +51,11 @@ class SnippetGeneratorFailure : public llvm::StringError { // Common code for all benchmark modes. class SnippetGenerator { public: - explicit SnippetGenerator(const LLVMState &State); + struct Options { + unsigned MaxConfigsPerOpcode = 1; + }; + + explicit SnippetGenerator(const LLVMState &State, const Options &Opts); virtual ~SnippetGenerator(); @@ -66,6 +70,7 @@ class SnippetGenerator { protected: const LLVMState &State; + const Options Opts; private: // API to be implemented by subclasses. diff --git a/llvm/tools/llvm-exegesis/lib/Target.cpp b/llvm/tools/llvm-exegesis/lib/Target.cpp index a5ba24c20f29d..7e27627c98caf 100644 --- a/llvm/tools/llvm-exegesis/lib/Target.cpp +++ b/llvm/tools/llvm-exegesis/lib/Target.cpp @@ -36,17 +36,17 @@ void ExegesisTarget::registerTarget(ExegesisTarget *Target) { FirstTarget = Target; } -std::unique_ptr -ExegesisTarget::createSnippetGenerator(InstructionBenchmark::ModeE Mode, - const LLVMState &State) const { +std::unique_ptr ExegesisTarget::createSnippetGenerator( + InstructionBenchmark::ModeE Mode, const LLVMState &State, + const SnippetGenerator::Options &Opts) const { switch (Mode) { case InstructionBenchmark::Unknown: return nullptr; case InstructionBenchmark::Latency: - return createLatencySnippetGenerator(State); + return createLatencySnippetGenerator(State, Opts); case InstructionBenchmark::Uops: case InstructionBenchmark::InverseThroughput: - return createUopsSnippetGenerator(State); + return createUopsSnippetGenerator(State, Opts); } return nullptr; } @@ -66,14 +66,14 @@ ExegesisTarget::createBenchmarkRunner(InstructionBenchmark::ModeE Mode, return nullptr; } -std::unique_ptr -ExegesisTarget::createLatencySnippetGenerator(const LLVMState &State) const { - return std::make_unique(State); +std::unique_ptr ExegesisTarget::createLatencySnippetGenerator( + const LLVMState &State, const SnippetGenerator::Options &Opts) const { + return std::make_unique(State, Opts); } -std::unique_ptr -ExegesisTarget::createUopsSnippetGenerator(const LLVMState &State) const { - return std::make_unique(State); +std::unique_ptr ExegesisTarget::createUopsSnippetGenerator( + const LLVMState &State, const SnippetGenerator::Options &Opts) const { + return std::make_unique(State, Opts); } std::unique_ptr ExegesisTarget::createLatencyBenchmarkRunner( diff --git a/llvm/tools/llvm-exegesis/lib/Target.h b/llvm/tools/llvm-exegesis/lib/Target.h index 70313a7a2f7ac..511104d5947b0 100644 --- a/llvm/tools/llvm-exegesis/lib/Target.h +++ b/llvm/tools/llvm-exegesis/lib/Target.h @@ -125,7 +125,8 @@ class ExegesisTarget { // Creates a snippet generator for the given mode. std::unique_ptr createSnippetGenerator(InstructionBenchmark::ModeE Mode, - const LLVMState &State) const; + const LLVMState &State, + const SnippetGenerator::Options &Opts) const; // Creates a benchmark runner for the given mode. std::unique_ptr createBenchmarkRunner(InstructionBenchmark::ModeE Mode, @@ -151,9 +152,9 @@ class ExegesisTarget { // Targets can implement their own snippet generators/benchmarks runners by // implementing these. std::unique_ptr virtual createLatencySnippetGenerator( - const LLVMState &State) const; + const LLVMState &State, const SnippetGenerator::Options &Opts) const; std::unique_ptr virtual createUopsSnippetGenerator( - const LLVMState &State) const; + const LLVMState &State, const SnippetGenerator::Options &Opts) const; std::unique_ptr virtual createLatencyBenchmarkRunner( const LLVMState &State, InstructionBenchmark::ModeE Mode) const; std::unique_ptr virtual createUopsBenchmarkRunner( diff --git a/llvm/tools/llvm-exegesis/lib/Uops.h b/llvm/tools/llvm-exegesis/lib/Uops.h index 23caff2665813..fcfeabe99ee07 100644 --- a/llvm/tools/llvm-exegesis/lib/Uops.h +++ b/llvm/tools/llvm-exegesis/lib/Uops.h @@ -22,7 +22,7 @@ namespace exegesis { class UopsSnippetGenerator : public SnippetGenerator { public: - UopsSnippetGenerator(const LLVMState &State) : SnippetGenerator(State) {} + using SnippetGenerator::SnippetGenerator; ~UopsSnippetGenerator() override; llvm::Expected> diff --git a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp index ce66610891d0a..1532af8ddec63 100644 --- a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp +++ b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp @@ -462,14 +462,16 @@ class ExegesisX86Target : public ExegesisTarget { sizeof(kUnavailableRegisters[0])); } - std::unique_ptr - createLatencySnippetGenerator(const LLVMState &State) const override { - return std::make_unique(State); + std::unique_ptr createLatencySnippetGenerator( + const LLVMState &State, + const SnippetGenerator::Options &Opts) const override { + return std::make_unique(State, Opts); } - std::unique_ptr - createUopsSnippetGenerator(const LLVMState &State) const override { - return std::make_unique(State); + std::unique_ptr createUopsSnippetGenerator( + const LLVMState &State, + const SnippetGenerator::Options &Opts) const override { + return std::make_unique(State, Opts); } bool matchesArch(llvm::Triple::ArchType Arch) const override { diff --git a/llvm/tools/llvm-exegesis/llvm-exegesis.cpp b/llvm/tools/llvm-exegesis/llvm-exegesis.cpp index 616b7c96232a7..e86dc817cb205 100644 --- a/llvm/tools/llvm-exegesis/llvm-exegesis.cpp +++ b/llvm/tools/llvm-exegesis/llvm-exegesis.cpp @@ -95,6 +95,12 @@ static cl::opt cl::desc("number of time to repeat the asm snippet"), cl::cat(BenchmarkOptions), cl::init(10000)); +static cl::opt MaxConfigsPerOpcode( + "max-configs-per-opcode", + cl::desc( + "allow to snippet generator to generate at most that many configs"), + cl::cat(BenchmarkOptions), cl::init(1)); + static cl::opt IgnoreInvalidSchedClass( "ignore-invalid-sched-class", cl::desc("ignore instructions that do not define a sched class"), @@ -214,8 +220,11 @@ generateSnippets(const LLVMState &State, unsigned Opcode, if (InstrDesc.isCall() || InstrDesc.isReturn()) return make_error("Unsupported opcode: isCall/isReturn"); + SnippetGenerator::Options Options; + Options.MaxConfigsPerOpcode = MaxConfigsPerOpcode; const std::unique_ptr Generator = - State.getExegesisTarget().createSnippetGenerator(BenchmarkMode, State); + State.getExegesisTarget().createSnippetGenerator(BenchmarkMode, State, + Options); if (!Generator) llvm::report_fatal_error("cannot create snippet generator"); return Generator->generateConfigurations(Instr, ForbiddenRegs); diff --git a/llvm/unittests/tools/llvm-exegesis/X86/SnippetGeneratorTest.cpp b/llvm/unittests/tools/llvm-exegesis/X86/SnippetGeneratorTest.cpp index c5a69250999e0..9eac269ed6d7b 100644 --- a/llvm/unittests/tools/llvm-exegesis/X86/SnippetGeneratorTest.cpp +++ b/llvm/unittests/tools/llvm-exegesis/X86/SnippetGeneratorTest.cpp @@ -45,7 +45,7 @@ class X86SnippetGeneratorTest : public X86TestBase { template class SnippetGeneratorTest : public X86SnippetGeneratorTest { protected: - SnippetGeneratorTest() : Generator(State) {} + SnippetGeneratorTest() : Generator(State, SnippetGenerator::Options()) {} std::vector checkAndGetCodeTemplates(unsigned Opcode) { randomGenerator().seed(0); // Initialize seed. @@ -335,7 +335,8 @@ TEST_F(UopsSnippetGeneratorTest, MemoryUse) { class FakeSnippetGenerator : public SnippetGenerator { public: - FakeSnippetGenerator(const LLVMState &State) : SnippetGenerator(State) {} + FakeSnippetGenerator(const LLVMState &State, const Options &Opts) + : SnippetGenerator(State, Opts) {} Instruction createInstruction(unsigned Opcode) { return State.getIC().getInstr(Opcode); From 45e0f2437327e753b0cd20fbe6fa25c8e1b226c7 Mon Sep 17 00:00:00 2001 From: Mirko Brkusanin Date: Tue, 8 Oct 2019 14:32:03 +0000 Subject: [PATCH 248/254] [Mips] Emit proper ABI for _mcount calls When -pg option is present than a call to _mcount is inserted into every function. However since the proper ABI was not followed then the generated gmon.out did not give proper results. By inserting needed instructions before every _mcount we can fix this. Differential Revision: https://reviews.llvm.org/D68390 llvm-svn: 374055 --- llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp | 45 ++++++++ llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h | 4 + llvm/test/CodeGen/Mips/mcount.ll | 117 ++++++++++++++++++++ 3 files changed, 166 insertions(+) create mode 100644 llvm/test/CodeGen/Mips/mcount.ll diff --git a/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp index d9354cadc73b7..65afb3650f800 100644 --- a/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp +++ b/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp @@ -124,6 +124,33 @@ bool MipsSEDAGToDAGISel::replaceUsesWithZeroReg(MachineRegisterInfo *MRI, return true; } +void MipsSEDAGToDAGISel::emitMCountABI(MachineInstr &MI, MachineBasicBlock &MBB, + MachineFunction &MF) { + MachineInstrBuilder MIB(MF, &MI); + if (!Subtarget->isABI_O32()) { // N32, N64 + // Save current return address. + BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(Mips::OR)) + .addDef(Mips::AT_64) + .addUse(Mips::RA_64) + .addUse(Mips::ZERO_64); + // Stops instruction above from being removed later on. + MIB.addUse(Mips::AT_64, RegState::Implicit); + } else { // O32 + // Save current return address. + BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(Mips::OR)) + .addDef(Mips::AT) + .addUse(Mips::RA) + .addUse(Mips::ZERO); + // _mcount pops 2 words from stack. + BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(Mips::ADDiu)) + .addDef(Mips::SP) + .addUse(Mips::SP) + .addImm(-8); + // Stops first instruction above from being removed later on. + MIB.addUse(Mips::AT, RegState::Implicit); + } +} + void MipsSEDAGToDAGISel::processFunctionAfterISel(MachineFunction &MF) { MF.getInfo()->initGlobalBaseReg(); @@ -150,6 +177,24 @@ void MipsSEDAGToDAGISel::processFunctionAfterISel(MachineFunction &MF) { if (Subtarget->isABI_FPXX() && !Subtarget->hasMTHC1()) MI.addOperand(MachineOperand::CreateReg(Mips::SP, false, true)); break; + case Mips::JAL: + case Mips::JAL_MM: + if (MI.getOperand(0).isGlobal() && + MI.getOperand(0).getGlobal()->getGlobalIdentifier() == "_mcount") + emitMCountABI(MI, MBB, MF); + break; + case Mips::JALRPseudo: + case Mips::JALR64Pseudo: + case Mips::JALR16_MM: + if (MI.getOperand(2).isMCSymbol() && + MI.getOperand(2).getMCSymbol()->getName() == "_mcount") + emitMCountABI(MI, MBB, MF); + break; + case Mips::JALR: + if (MI.getOperand(3).isMCSymbol() && + MI.getOperand(3).getMCSymbol()->getName() == "_mcount") + emitMCountABI(MI, MBB, MF); + break; default: replaceUsesWithZeroReg(MRI, MI); } diff --git a/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h b/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h index 80ab1ea9f635f..39f665be571e3 100644 --- a/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h +++ b/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h @@ -128,6 +128,10 @@ class MipsSEDAGToDAGISel : public MipsDAGToDAGISel { bool trySelect(SDNode *Node) override; + // Emits proper ABI for _mcount profiling calls. + void emitMCountABI(MachineInstr &MI, MachineBasicBlock &MBB, + MachineFunction &MF); + void processFunctionAfterISel(MachineFunction &MF) override; bool SelectInlineAsmMemoryOperand(const SDValue &Op, diff --git a/llvm/test/CodeGen/Mips/mcount.ll b/llvm/test/CodeGen/Mips/mcount.ll new file mode 100644 index 0000000000000..e136ae03da566 --- /dev/null +++ b/llvm/test/CodeGen/Mips/mcount.ll @@ -0,0 +1,117 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py + +; RUN: llc -march=mips < %s | FileCheck %s -check-prefix=MIPS32 +; RUN: llc -march=mips -relocation-model=pic < %s | FileCheck %s -check-prefix=MIPS32-PIC +; RUN: llc -march=mips64 < %s | FileCheck %s -check-prefix=MIPS64 +; RUN: llc -march=mips64 -relocation-model=pic < %s | FileCheck %s -check-prefix=MIPS64-PIC +; RUN: llc -march=mips -mattr=+micromips < %s | FileCheck %s -check-prefix=MIPS32-MM +; RUN: llc -march=mips -relocation-model=pic -mattr=+micromips < %s | FileCheck %s -check-prefix=MIPS32-MM-PIC + +; Test that checks ABI for _mcount calls. + +; Function Attrs: noinline nounwind optnone +define void @foo() #0 { +; MIPS32-LABEL: foo: +; MIPS32: # %bb.0: # %entry +; MIPS32-NEXT: addiu $sp, $sp, -24 +; MIPS32-NEXT: .cfi_def_cfa_offset 24 +; MIPS32-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill +; MIPS32-NEXT: .cfi_offset 31, -4 +; MIPS32-NEXT: move $1, $ra +; MIPS32-NEXT: jal _mcount +; MIPS32-NEXT: addiu $sp, $sp, -8 +; MIPS32-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: addiu $sp, $sp, 24 +; +; MIPS32-PIC-LABEL: foo: +; MIPS32-PIC: # %bb.0: # %entry +; MIPS32-PIC-NEXT: lui $2, %hi(_gp_disp) +; MIPS32-PIC-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32-PIC-NEXT: addiu $sp, $sp, -24 +; MIPS32-PIC-NEXT: .cfi_def_cfa_offset 24 +; MIPS32-PIC-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill +; MIPS32-PIC-NEXT: .cfi_offset 31, -4 +; MIPS32-PIC-NEXT: addu $gp, $2, $25 +; MIPS32-PIC-NEXT: lw $25, %call16(_mcount)($gp) +; MIPS32-PIC-NEXT: move $1, $ra +; MIPS32-PIC-NEXT: .reloc ($tmp0), R_MIPS_JALR, _mcount +; MIPS32-PIC-NEXT: $tmp0: +; MIPS32-PIC-NEXT: jalr $25 +; MIPS32-PIC-NEXT: addiu $sp, $sp, -8 +; MIPS32-PIC-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; MIPS32-PIC-NEXT: jr $ra +; MIPS32-PIC-NEXT: addiu $sp, $sp, 24 +; +; MIPS64-LABEL: foo: +; MIPS64: # %bb.0: # %entry +; MIPS64-NEXT: daddiu $sp, $sp, -16 +; MIPS64-NEXT: .cfi_def_cfa_offset 16 +; MIPS64-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill +; MIPS64-NEXT: .cfi_offset 31, -8 +; MIPS64-NEXT: or $1, $ra, $zero +; MIPS64-NEXT: jal _mcount +; MIPS64-NEXT: nop +; MIPS64-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload +; MIPS64-NEXT: jr $ra +; MIPS64-NEXT: daddiu $sp, $sp, 16 +; +; MIPS64-PIC-LABEL: foo: +; MIPS64-PIC: # %bb.0: # %entry +; MIPS64-PIC-NEXT: daddiu $sp, $sp, -16 +; MIPS64-PIC-NEXT: .cfi_def_cfa_offset 16 +; MIPS64-PIC-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill +; MIPS64-PIC-NEXT: sd $gp, 0($sp) # 8-byte Folded Spill +; MIPS64-PIC-NEXT: .cfi_offset 31, -8 +; MIPS64-PIC-NEXT: .cfi_offset 28, -16 +; MIPS64-PIC-NEXT: lui $1, %hi(%neg(%gp_rel(foo))) +; MIPS64-PIC-NEXT: daddu $1, $1, $25 +; MIPS64-PIC-NEXT: daddiu $gp, $1, %lo(%neg(%gp_rel(foo))) +; MIPS64-PIC-NEXT: ld $25, %call16(_mcount)($gp) +; MIPS64-PIC-NEXT: or $1, $ra, $zero +; MIPS64-PIC-NEXT: .reloc .Ltmp0, R_MIPS_JALR, _mcount +; MIPS64-PIC-NEXT: .Ltmp0: +; MIPS64-PIC-NEXT: jalr $25 +; MIPS64-PIC-NEXT: nop +; MIPS64-PIC-NEXT: ld $gp, 0($sp) # 8-byte Folded Reload +; MIPS64-PIC-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload +; MIPS64-PIC-NEXT: jr $ra +; MIPS64-PIC-NEXT: daddiu $sp, $sp, 16 +; +; MIPS32-MM-LABEL: foo: +; MIPS32-MM: # %bb.0: # %entry +; MIPS32-MM-NEXT: addiu $sp, $sp, -24 +; MIPS32-MM-NEXT: .cfi_def_cfa_offset 24 +; MIPS32-MM-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill +; MIPS32-MM-NEXT: .cfi_offset 31, -4 +; MIPS32-MM-NEXT: move $1, $ra +; MIPS32-MM-NEXT: jal _mcount +; MIPS32-MM-NEXT: addiu $sp, $sp, -8 +; MIPS32-MM-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; MIPS32-MM-NEXT: jr $ra +; MIPS32-MM-NEXT: addiu $sp, $sp, 24 +; +; MIPS32-MM-PIC-LABEL: foo: +; MIPS32-MM-PIC: # %bb.0: # %entry +; MIPS32-MM-PIC-NEXT: lui $2, %hi(_gp_disp) +; MIPS32-MM-PIC-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32-MM-PIC-NEXT: addiu $sp, $sp, -24 +; MIPS32-MM-PIC-NEXT: .cfi_def_cfa_offset 24 +; MIPS32-MM-PIC-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill +; MIPS32-MM-PIC-NEXT: .cfi_offset 31, -4 +; MIPS32-MM-PIC-NEXT: addu $2, $2, $25 +; MIPS32-MM-PIC-NEXT: lw $25, %call16(_mcount)($2) +; MIPS32-MM-PIC-NEXT: move $gp, $2 +; MIPS32-MM-PIC-NEXT: move $1, $ra +; MIPS32-MM-PIC-NEXT: .reloc ($tmp0), R_MICROMIPS_JALR, _mcount +; MIPS32-MM-PIC-NEXT: $tmp0: +; MIPS32-MM-PIC-NEXT: jalr $25 +; MIPS32-MM-PIC-NEXT: addiu $sp, $sp, -8 +; MIPS32-MM-PIC-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; MIPS32-MM-PIC-NEXT: jr $ra +; MIPS32-MM-PIC-NEXT: addiu $sp, $sp, 24 +entry: + ret void +} + +attributes #0 = { "instrument-function-entry-inlined"="_mcount" } From c864f73e45ce138040b14d54dd4ad2bdcba396ea Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Tue, 8 Oct 2019 14:53:11 +0000 Subject: [PATCH 249/254] [libc++] TAKE 2: Make system libraries PRIVATE dependencies of libc++ We tried doing that previously (in r373487) and failed (reverted in r373506) because the benchmarks needed to link against system libraries and relied on libc++'s dependencies being propagated. Now that this has been fixed (in r374053), this commit marks the system libraries as PRIVATE dependencies of libc++. llvm-svn: 374056 --- libcxx/src/CMakeLists.txt | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/libcxx/src/CMakeLists.txt b/libcxx/src/CMakeLists.txt index 03bc126be5672..2dad20049a1a1 100644 --- a/libcxx/src/CMakeLists.txt +++ b/libcxx/src/CMakeLists.txt @@ -143,40 +143,40 @@ function(cxx_link_system_libraries target) target_add_link_flags_if_supported(${target} PRIVATE "/nodefaultlib") if (LIBCXX_HAS_SYSTEM_LIB) - target_link_libraries(${target} PUBLIC System) + target_link_libraries(${target} PRIVATE System) endif() if (LIBCXX_HAS_PTHREAD_LIB) - target_link_libraries(${target} PUBLIC pthread) + target_link_libraries(${target} PRIVATE pthread) endif() if (LIBCXX_HAS_C_LIB) - target_link_libraries(${target} PUBLIC c) + target_link_libraries(${target} PRIVATE c) endif() if (LIBCXX_HAS_M_LIB) - target_link_libraries(${target} PUBLIC m) + target_link_libraries(${target} PRIVATE m) endif() if (LIBCXX_HAS_RT_LIB) - target_link_libraries(${target} PUBLIC rt) + target_link_libraries(${target} PRIVATE rt) endif() if (LIBCXX_USE_COMPILER_RT) find_compiler_rt_library(builtins LIBCXX_BUILTINS_LIBRARY) if (LIBCXX_BUILTINS_LIBRARY) - target_link_libraries(${target} PUBLIC "${LIBCXX_BUILTINS_LIBRARY}") + target_link_libraries(${target} PRIVATE "${LIBCXX_BUILTINS_LIBRARY}") endif() elseif (LIBCXX_HAS_GCC_S_LIB) - target_link_libraries(${target} PUBLIC gcc_s) + target_link_libraries(${target} PRIVATE gcc_s) endif() if (LIBCXX_HAVE_CXX_ATOMICS_WITH_LIB) - target_link_libraries(${target} PUBLIC atomic) + target_link_libraries(${target} PRIVATE atomic) endif() if (MINGW) - target_link_libraries(${target} PUBLIC "${MINGW_LIBRARIES}") + target_link_libraries(${target} PRIVATE "${MINGW_LIBRARIES}") endif() if (LIBCXX_TARGETING_MSVC) @@ -186,13 +186,13 @@ function(cxx_link_system_libraries target) set(LIB_SUFFIX "") endif() - target_link_libraries(${target} PUBLIC ucrt${LIB_SUFFIX}) # Universal C runtime - target_link_libraries(${target} PUBLIC vcruntime${LIB_SUFFIX}) # C++ runtime - target_link_libraries(${target} PUBLIC msvcrt${LIB_SUFFIX}) # C runtime startup files - target_link_libraries(${target} PUBLIC msvcprt${LIB_SUFFIX}) # C++ standard library. Required for exception_ptr internals. + target_link_libraries(${target} PRIVATE ucrt${LIB_SUFFIX}) # Universal C runtime + target_link_libraries(${target} PRIVATE vcruntime${LIB_SUFFIX}) # C++ runtime + target_link_libraries(${target} PRIVATE msvcrt${LIB_SUFFIX}) # C runtime startup files + target_link_libraries(${target} PRIVATE msvcprt${LIB_SUFFIX}) # C++ standard library. Required for exception_ptr internals. # Required for standards-complaint wide character formatting functions # (e.g. `printfw`/`scanfw`) - target_link_libraries(${target} PUBLIC iso_stdio_wide_specifiers) + target_link_libraries(${target} PRIVATE iso_stdio_wide_specifiers) endif() endfunction() From 6b06ead19be79fd6e2d2abdda4c4cbb7c8f3c7c0 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Tue, 8 Oct 2019 14:56:20 +0000 Subject: [PATCH 250/254] [OPENMP50]Allow functions in declare variant directive to have different C linkage. After some discussion with OpenMP developers, it was decided that the functions with the different C linkage can be used in declare variant directive. llvm-svn: 374057 --- clang/include/clang/Sema/Sema.h | 2 +- clang/lib/Sema/SemaDecl.cpp | 7 ++++--- clang/lib/Sema/SemaOpenMP.cpp | 3 ++- .../test/OpenMP/declare_variant_ast_print.cpp | 19 +++++++++++++++++++ ..._variant_implementation_vendor_codegen.cpp | 12 ++++++++++++ .../test/OpenMP/declare_variant_messages.cpp | 12 ------------ 6 files changed, 38 insertions(+), 17 deletions(-) diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index a77cbc09e400e..28180ed6d116f 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -9128,7 +9128,7 @@ class Sema { const PartialDiagnosticAt &NoteCausedDiagIDAt, const PartialDiagnosticAt &NoSupportDiagIDAt, const PartialDiagnosticAt &DiffDiagIDAt, bool TemplatesSupported, - bool ConstexprSupported); + bool ConstexprSupported, bool CLinkageMayDiffer); /// Function tries to capture lambda's captured variables in the OpenMP region /// before the original lambda is captured. diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index db89e54c51749..4a027d868196d 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -9685,7 +9685,7 @@ bool Sema::areMultiversionVariantFunctionsCompatible( const PartialDiagnosticAt &NoteCausedDiagIDAt, const PartialDiagnosticAt &NoSupportDiagIDAt, const PartialDiagnosticAt &DiffDiagIDAt, bool TemplatesSupported, - bool ConstexprSupported) { + bool ConstexprSupported, bool CLinkageMayDiffer) { enum DoesntSupport { FuncTemplates = 0, VirtFuncs = 1, @@ -9778,7 +9778,7 @@ bool Sema::areMultiversionVariantFunctionsCompatible( if (OldFD->getStorageClass() != NewFD->getStorageClass()) return Diag(DiffDiagIDAt.first, DiffDiagIDAt.second) << StorageClass; - if (OldFD->isExternC() != NewFD->isExternC()) + if (!CLinkageMayDiffer && OldFD->isExternC() != NewFD->isExternC()) return Diag(DiffDiagIDAt.first, DiffDiagIDAt.second) << Linkage; if (CheckEquivalentExceptionSpec( @@ -9831,7 +9831,8 @@ static bool CheckMultiVersionAdditionalRules(Sema &S, const FunctionDecl *OldFD, PartialDiagnosticAt(NewFD->getLocation(), S.PDiag(diag::err_multiversion_diff)), /*TemplatesSupported=*/false, - /*ConstexprSupported=*/!IsCPUSpecificCPUDispatchMVType); + /*ConstexprSupported=*/!IsCPUSpecificCPUDispatchMVType, + /*CLinkageMayDiffer=*/false); } /// Check the validity of a multiversion function declaration that is the diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index b0fd6aa5fd5fe..ff7e1c004c5c6 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -5109,7 +5109,8 @@ Sema::checkOpenMPDeclareVariantFunction(Sema::DeclGroupPtrTy DG, PartialDiagnosticAt(VariantRef->getExprLoc(), PDiag(diag::err_omp_declare_variant_diff) << FD->getLocation()), - /*TemplatesSupported=*/true, /*ConstexprSupported=*/false)) + /*TemplatesSupported=*/true, /*ConstexprSupported=*/false, + /*CLinkageMayDiffer=*/true)) return None; return std::make_pair(FD, cast(DRE)); } diff --git a/clang/test/OpenMP/declare_variant_ast_print.cpp b/clang/test/OpenMP/declare_variant_ast_print.cpp index c79b6b8da1025..984aafac762ef 100644 --- a/clang/test/OpenMP/declare_variant_ast_print.cpp +++ b/clang/test/OpenMP/declare_variant_ast_print.cpp @@ -201,3 +201,22 @@ void bazzzz() { s.foo1(); static_f(); } + +// CHECK: int fn_linkage_variant(); +// CHECK: extern "C" { +// CHECK: #pragma omp declare variant(fn_linkage_variant) match(implementation={vendor(xxx)}) +// CHECK: int fn_linkage(); +// CHECK: } +int fn_linkage_variant(); +extern "C" { +#pragma omp declare variant(fn_linkage_variant) match(implementation = {vendor(xxx)}) +int fn_linkage(); +} + +// CHECK: extern "C" int fn_linkage_variant1() +// CHECK: #pragma omp declare variant(fn_linkage_variant1) match(implementation={vendor(xxx)}) +// CHECK: int fn_linkage1(); +extern "C" int fn_linkage_variant1(); +#pragma omp declare variant(fn_linkage_variant1) match(implementation = {vendor(xxx)}) +int fn_linkage1(); + diff --git a/clang/test/OpenMP/declare_variant_implementation_vendor_codegen.cpp b/clang/test/OpenMP/declare_variant_implementation_vendor_codegen.cpp index b0940751b76d7..1c200cca909fe 100644 --- a/clang/test/OpenMP/declare_variant_implementation_vendor_codegen.cpp +++ b/clang/test/OpenMP/declare_variant_implementation_vendor_codegen.cpp @@ -22,6 +22,8 @@ // CHECK-DAG: ret i32 7 // CHECK-DAG: ret i32 82 // CHECK-DAG: ret i32 83 +// CHECK-DAG: ret i32 85 +// CHECK-DAG: ret i32 86 // CHECK-NOT: ret i32 {{1|4|81|84}} #ifndef HEADER @@ -110,4 +112,14 @@ static int prio1_() { return 1; } int int_fn() { return prio1_(); } +int fn_linkage_variant() { return 85; } +extern "C" { +#pragma omp declare variant(fn_linkage_variant) match(implementation = {vendor(llvm)}) +int fn_linkage() { return 1; } +} + +extern "C" int fn_linkage_variant1() { return 86; } +#pragma omp declare variant(fn_linkage_variant1) match(implementation = {vendor(llvm)}) +int fn_linkage1() { return 1; } + #endif // HEADER diff --git a/clang/test/OpenMP/declare_variant_messages.cpp b/clang/test/OpenMP/declare_variant_messages.cpp index aee5544bf2933..f80c93c312d76 100644 --- a/clang/test/OpenMP/declare_variant_messages.cpp +++ b/clang/test/OpenMP/declare_variant_messages.cpp @@ -165,18 +165,6 @@ inline int fn_inline_variant1(); #pragma omp declare variant(fn_inline_variant1) match(xxx = {}) int fn_inline1(); -int fn_linkage_variant(); -extern "C" { -// expected-error@+1 {{function with '#pragma omp declare variant' has a different linkage}} -#pragma omp declare variant(fn_linkage_variant) match(xxx = {}) -int fn_linkage(); -} - -extern "C" int fn_linkage_variant1(); -// expected-error@+1 {{function with '#pragma omp declare variant' has a different linkage}} -#pragma omp declare variant(fn_linkage_variant1) match(xxx = {}) -int fn_linkage1(); - auto fn_deduced_variant() { return 0; } #pragma omp declare variant(fn_deduced_variant) match(xxx = {}) int fn_deduced(); From 5d566c5a46aeaa1fa0e5c0b823c9d5f84036dc9a Mon Sep 17 00:00:00 2001 From: Cyndy Ishida Date: Tue, 8 Oct 2019 15:07:36 +0000 Subject: [PATCH 251/254] [TextAPI] Introduce TBDv4 Summary: This format introduces new features and platforms The motivation for this format is to support more than 1 platform since previous versions only supported additional architectures and 1 platform, for example ios + ios-simulator and macCatalyst. Reviewers: ributzka, steven_wu Reviewed By: ributzka Subscribers: mgorny, hiraditya, mgrang, dexonsmith, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D67529 llvm-svn: 374058 --- .../llvm/TextAPI/MachO/InterfaceFile.h | 3 + llvm/include/llvm/TextAPI/MachO/Symbol.h | 11 +- llvm/include/llvm/TextAPI/MachO/Target.h | 2 + llvm/lib/TextAPI/MachO/Target.cpp | 40 +- llvm/lib/TextAPI/MachO/TextStub.cpp | 498 +++++++++++++++- llvm/lib/TextAPI/MachO/TextStubCommon.cpp | 25 +- llvm/unittests/TextAPI/CMakeLists.txt | 1 + llvm/unittests/TextAPI/TextStubV4Tests.cpp | 558 ++++++++++++++++++ 8 files changed, 1104 insertions(+), 34 deletions(-) create mode 100644 llvm/unittests/TextAPI/TextStubV4Tests.cpp diff --git a/llvm/include/llvm/TextAPI/MachO/InterfaceFile.h b/llvm/include/llvm/TextAPI/MachO/InterfaceFile.h index 8bb6b5ff06665..bd434e04b693a 100644 --- a/llvm/include/llvm/TextAPI/MachO/InterfaceFile.h +++ b/llvm/include/llvm/TextAPI/MachO/InterfaceFile.h @@ -67,6 +67,9 @@ enum FileType : unsigned { /// Text-based stub file (.tbd) version 3.0 TBD_V3 = 1U << 2, + /// Text-based stub file (.tbd) version 4.0 + TBD_V4 = 1U << 3, + All = ~0U, LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/All), diff --git a/llvm/include/llvm/TextAPI/MachO/Symbol.h b/llvm/include/llvm/TextAPI/MachO/Symbol.h index b6444fbd78ff7..1b1632c599c4a 100644 --- a/llvm/include/llvm/TextAPI/MachO/Symbol.h +++ b/llvm/include/llvm/TextAPI/MachO/Symbol.h @@ -38,7 +38,10 @@ enum class SymbolFlags : uint8_t { /// Undefined Undefined = 1U << 3, - LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/Undefined), + /// Rexported + Rexported = 1U << 4, + + LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/Rexported), }; // clang-format on @@ -50,7 +53,7 @@ enum class SymbolKind : uint8_t { ObjectiveCInstanceVariable, }; -using TargetList = SmallVector; +using TargetList = SmallVector; class Symbol { public: Symbol(SymbolKind Kind, StringRef Name, TargetList Targets, SymbolFlags Flags) @@ -81,6 +84,10 @@ class Symbol { return (Flags & SymbolFlags::Undefined) == SymbolFlags::Undefined; } + bool isReexported() const { + return (Flags & SymbolFlags::Rexported) == SymbolFlags::Rexported; + } + using const_target_iterator = TargetList::const_iterator; using const_target_range = llvm::iterator_range; const_target_range targets() const { return {Targets}; } diff --git a/llvm/include/llvm/TextAPI/MachO/Target.h b/llvm/include/llvm/TextAPI/MachO/Target.h index 74e900d812f52..5fe44cb7d366f 100644 --- a/llvm/include/llvm/TextAPI/MachO/Target.h +++ b/llvm/include/llvm/TextAPI/MachO/Target.h @@ -29,6 +29,8 @@ class Target { explicit Target(const llvm::Triple &Triple) : Arch(mapToArchitecture(Triple)), Platform(mapToPlatformKind(Triple)) {} + static llvm::Expected create(StringRef Target); + operator std::string() const; Architecture Arch; diff --git a/llvm/lib/TextAPI/MachO/Target.cpp b/llvm/lib/TextAPI/MachO/Target.cpp index 3052aa53ac233..52fb339c2408c 100644 --- a/llvm/lib/TextAPI/MachO/Target.cpp +++ b/llvm/lib/TextAPI/MachO/Target.cpp @@ -17,6 +17,44 @@ namespace llvm { namespace MachO { +Expected Target::create(StringRef TargetValue) { + auto Result = TargetValue.split('-'); + auto ArchitectureStr = Result.first; + auto Architecture = getArchitectureFromName(ArchitectureStr); + if (Architecture == AK_unknown) + return make_error("invalid architecture", + inconvertibleErrorCode()); + auto PlatformStr = Result.second; + PlatformKind Platform; + Platform = StringSwitch(PlatformStr) + .Case("macos", PlatformKind::macOS) + .Case("ios", PlatformKind::iOS) + .Case("tvos", PlatformKind::tvOS) + .Case("watchos", PlatformKind::watchOS) + .Case("bridgeos", PlatformKind::bridgeOS) + .Case("maccatalyst", PlatformKind::macCatalyst) + .Case("ios-simulator", PlatformKind::iOSSimulator) + .Case("tvos-simulator", PlatformKind::tvOSSimulator) + .Case("watchos-simulator", PlatformKind::watchOSSimulator) + .Default(PlatformKind::unknown); + + if (Platform == PlatformKind::unknown) { + if (PlatformStr.startswith("<") && PlatformStr.endswith(">")) { + PlatformStr = PlatformStr.drop_front().drop_back(); + unsigned long long RawValue; + if (PlatformStr.getAsInteger(10, RawValue)) + return make_error("invalid platform number", + inconvertibleErrorCode()); + + Platform = (PlatformKind)RawValue; + } + return make_error("invalid platform", + inconvertibleErrorCode()); + } + + return Target{Architecture, Platform}; +} + Target::operator std::string() const { return (getArchitectureName(Arch) + " (" + getPlatformName(Platform) + ")") .str(); @@ -42,4 +80,4 @@ ArchitectureSet mapToArchitectureSet(ArrayRef Targets) { } } // end namespace MachO. -} // end namespace llvm. \ No newline at end of file +} // end namespace llvm. diff --git a/llvm/lib/TextAPI/MachO/TextStub.cpp b/llvm/lib/TextAPI/MachO/TextStub.cpp index 78c9f54ba22dd..e37ce15e5c1ef 100644 --- a/llvm/lib/TextAPI/MachO/TextStub.cpp +++ b/llvm/lib/TextAPI/MachO/TextStub.cpp @@ -147,6 +147,58 @@ Each undefineds section is defined as following: objc-ivars: [] # Optional: List of Objective C Instance Variables weak-ref-symbols: [] # Optional: List of weak defined symbols */ + +/* + + YAML Format specification. + +--- !tapi-tbd +tbd-version: 4 # The tbd version for format +targets: [ armv7-ios, x86_64-maccatalyst ] # The list of applicable tapi supported target triples +uuids: # Optional: List of target and UUID pairs. + - target: armv7-ios + value: ... + - target: x86_64-maccatalyst + value: ... +flags: [] # Optional: +install-name: /u/l/libfoo.dylib # +current-version: 1.2.3 # Optional: defaults to 1.0 +compatibility-version: 1.0 # Optional: defaults to 1.0 +swift-abi-version: 0 # Optional: defaults to 0 +parent-umbrella: # Optional: +allowable-clients: + - targets: [ armv7-ios ] # Optional: + clients: [ clientA ] +exports: # List of export sections +... +re-exports: # List of reexport sections +... +undefineds: # List of undefineds sections +... + +Each export and reexport section is defined as following: + +- targets: [ arm64-macos ] # The list of target triples associated with symbols + symbols: [ _symA ] # Optional: List of symbols + objc-classes: [] # Optional: List of Objective-C classes + objc-eh-types: [] # Optional: List of Objective-C classes + # with EH + objc-ivars: [] # Optional: List of Objective C Instance + # Variables + weak-symbols: [] # Optional: List of weak defined symbols + thread-local-symbols: [] # Optional: List of thread local symbols +- targets: [ arm64-macos, x86_64-maccatalyst ] # Optional: Targets for applicable additional symbols + symbols: [ _symB ] # Optional: List of symbols + +Each undefineds section is defined as following: +- targets: [ arm64-macos ] # The list of target triples associated with symbols + symbols: [ _symC ] # Optional: List of symbols + objc-classes: [] # Optional: List of Objective-C classes + objc-eh-types: [] # Optional: List of Objective-C classes + # with EH + objc-ivars: [] # Optional: List of Objective C Instance Variables + weak-symbols: [] # Optional: List of weak defined symbols +*/ // clang-format on using namespace llvm; @@ -175,6 +227,38 @@ struct UndefinedSection { std::vector WeakRefSymbols; }; +// Sections for direct target mapping in TBDv4 +struct SymbolSection { + TargetList Targets; + std::vector Symbols; + std::vector Classes; + std::vector ClassEHs; + std::vector Ivars; + std::vector WeakSymbols; + std::vector TlvSymbols; +}; + +struct MetadataSection { + enum Option { Clients, Libraries }; + std::vector Targets; + std::vector Values; +}; + +struct UmbrellaSection { + std::vector Targets; + std::string Umbrella; +}; + +// UUID's for TBDv4 are mapped to target not arch +struct UUIDv4 { + Target TargetID; + std::string Value; + + UUIDv4() = default; + UUIDv4(const Target &TargetID, const std::string &Value) + : TargetID(TargetID), Value(Value) {} +}; + // clang-format off enum TBDFlags : unsigned { None = 0U, @@ -189,6 +273,12 @@ enum TBDFlags : unsigned { LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(Architecture) LLVM_YAML_IS_SEQUENCE_VECTOR(ExportSection) LLVM_YAML_IS_SEQUENCE_VECTOR(UndefinedSection) +// Specific to TBDv4 +LLVM_YAML_IS_SEQUENCE_VECTOR(SymbolSection) +LLVM_YAML_IS_SEQUENCE_VECTOR(MetadataSection) +LLVM_YAML_IS_SEQUENCE_VECTOR(UmbrellaSection) +LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(Target) +LLVM_YAML_IS_SEQUENCE_VECTOR(UUIDv4) namespace llvm { namespace yaml { @@ -231,6 +321,49 @@ template <> struct MappingTraits { } }; +template <> struct MappingTraits { + static void mapping(IO &IO, SymbolSection &Section) { + IO.mapRequired("targets", Section.Targets); + IO.mapOptional("symbols", Section.Symbols); + IO.mapOptional("objc-classes", Section.Classes); + IO.mapOptional("objc-eh-types", Section.ClassEHs); + IO.mapOptional("objc-ivars", Section.Ivars); + IO.mapOptional("weak-symbols", Section.WeakSymbols); + IO.mapOptional("thread-local-symbols", Section.TlvSymbols); + } +}; + +template <> struct MappingTraits { + static void mapping(IO &IO, UmbrellaSection &Section) { + IO.mapRequired("targets", Section.Targets); + IO.mapRequired("umbrella", Section.Umbrella); + } +}; + +template <> struct MappingTraits { + static void mapping(IO &IO, UUIDv4 &UUID) { + IO.mapRequired("target", UUID.TargetID); + IO.mapRequired("value", UUID.Value); + } +}; + +template <> +struct MappingContextTraits { + static void mapping(IO &IO, MetadataSection &Section, + MetadataSection::Option &OptionKind) { + IO.mapRequired("targets", Section.Targets); + switch (OptionKind) { + case MetadataSection::Option::Clients: + IO.mapRequired("clients", Section.Values); + return; + case MetadataSection::Option::Libraries: + IO.mapRequired("libraries", Section.Values); + return; + } + llvm_unreachable("unexpected option for metadata"); + } +}; + template <> struct ScalarBitSetTraits { static void bitset(IO &IO, TBDFlags &Flags) { IO.bitSetCase(Flags, "flat_namespace", TBDFlags::FlatNamespace); @@ -240,6 +373,55 @@ template <> struct ScalarBitSetTraits { } }; +template <> struct ScalarTraits { + static void output(const Target &Value, void *, raw_ostream &OS) { + OS << Value.Arch << "-"; + switch (Value.Platform) { + default: + OS << "unknown"; + break; + case PlatformKind::macOS: + OS << "macos"; + break; + case PlatformKind::iOS: + OS << "ios"; + break; + case PlatformKind::tvOS: + OS << "tvos"; + break; + case PlatformKind::watchOS: + OS << "watchos"; + break; + case PlatformKind::bridgeOS: + OS << "bridgeos"; + break; + case PlatformKind::macCatalyst: + OS << "maccatalyst"; + break; + case PlatformKind::iOSSimulator: + OS << "ios-simulator"; + break; + case PlatformKind::tvOSSimulator: + OS << "tvos-simulator"; + break; + case PlatformKind::watchOSSimulator: + OS << "watchos-simulator"; + break; + } + } + + static StringRef input(StringRef Scalar, void *, Target &Value) { + auto Result = Target::create(Scalar); + if (!Result) + return toString(Result.takeError()); + + Value = *Result; + return {}; + } + + static QuotingType mustQuote(StringRef) { return QuotingType::None; } +}; + template <> struct MappingTraits { struct NormalizedTBD { explicit NormalizedTBD(IO &IO) {} @@ -555,71 +737,339 @@ template <> struct MappingTraits { std::vector Undefineds; }; + static void setFileTypeForInput(TextAPIContext *Ctx, IO &IO) { + if (IO.mapTag("!tapi-tbd", false)) + Ctx->FileKind = FileType::TBD_V4; + else if (IO.mapTag("!tapi-tbd-v3", false)) + Ctx->FileKind = FileType::TBD_V3; + else if (IO.mapTag("!tapi-tbd-v2", false)) + Ctx->FileKind = FileType::TBD_V2; + else if (IO.mapTag("!tapi-tbd-v1", false) || + IO.mapTag("tag:yaml.org,2002:map", false)) + Ctx->FileKind = FileType::TBD_V1; + else { + Ctx->FileKind = FileType::Invalid; + return; + } + } + static void mapping(IO &IO, const InterfaceFile *&File) { auto *Ctx = reinterpret_cast(IO.getContext()); assert((!Ctx || !IO.outputting() || (Ctx && Ctx->FileKind != FileType::Invalid)) && "File type is not set in YAML context"); - MappingNormalization Keys(IO, File); - // prope file type when reading. if (!IO.outputting()) { - if (IO.mapTag("!tapi-tbd-v3", false)) - Ctx->FileKind = FileType::TBD_V3; - else if (IO.mapTag("!tapi-tbd-v2", false)) - Ctx->FileKind = FileType::TBD_V2; - else if (IO.mapTag("!tapi-tbd-v1", false) || - IO.mapTag("tag:yaml.org,2002:map", false)) - Ctx->FileKind = FileType::TBD_V1; - else { + setFileTypeForInput(Ctx, IO); + switch (Ctx->FileKind) { + default: + break; + case FileType::TBD_V4: + mapKeysToValuesV4(IO, File); + return; + case FileType::Invalid: IO.setError("unsupported file type"); return; } - } - - // Set file type when writing. - if (IO.outputting()) { + } else { + // Set file type when writing. switch (Ctx->FileKind) { default: llvm_unreachable("unexpected file type"); - case FileType::TBD_V1: - // Don't write the tag into the .tbd file for TBD v1. + case FileType::TBD_V4: + mapKeysToValuesV4(IO, File); + return; + case FileType::TBD_V3: + IO.mapTag("!tapi-tbd-v3", true); break; case FileType::TBD_V2: IO.mapTag("!tapi-tbd-v2", true); break; - case FileType::TBD_V3: - IO.mapTag("!tapi-tbd-v3", true); + case FileType::TBD_V1: + // Don't write the tag into the .tbd file for TBD v1 break; } } + mapKeysToValues(Ctx->FileKind, IO, File); + } + + using SectionList = std::vector; + struct NormalizedTBD_V4 { + explicit NormalizedTBD_V4(IO &IO) {} + NormalizedTBD_V4(IO &IO, const InterfaceFile *&File) { + auto Ctx = reinterpret_cast(IO.getContext()); + assert(Ctx); + TBDVersion = Ctx->FileKind >> 1; + Targets.insert(Targets.begin(), File->targets().begin(), + File->targets().end()); + for (const auto &IT : File->uuids()) + UUIDs.emplace_back(IT.first, IT.second); + InstallName = File->getInstallName(); + CurrentVersion = File->getCurrentVersion(); + CompatibilityVersion = File->getCompatibilityVersion(); + SwiftVersion = File->getSwiftABIVersion(); + + Flags = TBDFlags::None; + if (!File->isApplicationExtensionSafe()) + Flags |= TBDFlags::NotApplicationExtensionSafe; + if (!File->isTwoLevelNamespace()) + Flags |= TBDFlags::FlatNamespace; + + if (File->isInstallAPI()) + Flags |= TBDFlags::InstallAPI; + + { + using TargetList = SmallVector; + std::map valueToTargetList; + for (const auto &it : File->umbrellas()) + valueToTargetList[it.second].emplace_back(it.first); + + for (const auto &it : valueToTargetList) { + UmbrellaSection CurrentSection; + CurrentSection.Targets.insert(CurrentSection.Targets.begin(), + it.second.begin(), it.second.end()); + CurrentSection.Umbrella = it.first; + ParentUmbrellas.emplace_back(std::move(CurrentSection)); + } + } + + assignTargetsToLibrary(File->allowableClients(), AllowableClients); + assignTargetsToLibrary(File->reexportedLibraries(), ReexportedLibraries); + + auto handleSymbols = + [](SectionList &CurrentSections, + InterfaceFile::const_filtered_symbol_range Symbols, + std::function Pred) { + using TargetList = SmallVector; + std::set TargetSet; + std::map SymbolToTargetList; + for (const auto *Symbol : Symbols) { + if (!Pred(Symbol)) + continue; + TargetList Targets(Symbol->targets()); + SymbolToTargetList[Symbol] = Targets; + TargetSet.emplace(std::move(Targets)); + } + for (const auto &TargetIDs : TargetSet) { + SymbolSection CurrentSection; + CurrentSection.Targets.insert(CurrentSection.Targets.begin(), + TargetIDs.begin(), TargetIDs.end()); + + for (const auto &IT : SymbolToTargetList) { + if (IT.second != TargetIDs) + continue; + + const auto *Symbol = IT.first; + switch (Symbol->getKind()) { + case SymbolKind::GlobalSymbol: + if (Symbol->isWeakDefined()) + CurrentSection.WeakSymbols.emplace_back(Symbol->getName()); + else if (Symbol->isThreadLocalValue()) + CurrentSection.TlvSymbols.emplace_back(Symbol->getName()); + else + CurrentSection.Symbols.emplace_back(Symbol->getName()); + break; + case SymbolKind::ObjectiveCClass: + CurrentSection.Classes.emplace_back(Symbol->getName()); + break; + case SymbolKind::ObjectiveCClassEHType: + CurrentSection.ClassEHs.emplace_back(Symbol->getName()); + break; + case SymbolKind::ObjectiveCInstanceVariable: + CurrentSection.Ivars.emplace_back(Symbol->getName()); + break; + } + } + sort(CurrentSection.Symbols); + sort(CurrentSection.Classes); + sort(CurrentSection.ClassEHs); + sort(CurrentSection.Ivars); + sort(CurrentSection.WeakSymbols); + sort(CurrentSection.TlvSymbols); + CurrentSections.emplace_back(std::move(CurrentSection)); + } + }; + + handleSymbols(Exports, File->exports(), [](const Symbol *Symbol) { + return !Symbol->isReexported(); + }); + handleSymbols(Reexports, File->exports(), [](const Symbol *Symbol) { + return Symbol->isReexported(); + }); + handleSymbols(Undefineds, File->undefineds(), + [](const Symbol *Symbol) { return true; }); + } + + const InterfaceFile *denormalize(IO &IO) { + auto Ctx = reinterpret_cast(IO.getContext()); + assert(Ctx); + + auto *File = new InterfaceFile; + File->setPath(Ctx->Path); + File->setFileType(Ctx->FileKind); + for (auto &id : UUIDs) + File->addUUID(id.TargetID, id.Value); + File->addTargets(Targets); + File->setInstallName(InstallName); + File->setCurrentVersion(CurrentVersion); + File->setCompatibilityVersion(CompatibilityVersion); + File->setSwiftABIVersion(SwiftVersion); + for (const auto &CurrentSection : ParentUmbrellas) + for (const auto &target : CurrentSection.Targets) + File->addParentUmbrella(target, CurrentSection.Umbrella); + File->setTwoLevelNamespace(!(Flags & TBDFlags::FlatNamespace)); + File->setApplicationExtensionSafe( + !(Flags & TBDFlags::NotApplicationExtensionSafe)); + File->setInstallAPI(Flags & TBDFlags::InstallAPI); + + for (const auto &CurrentSection : AllowableClients) { + for (const auto &lib : CurrentSection.Values) + for (const auto &Target : CurrentSection.Targets) + File->addAllowableClient(lib, Target); + } + + for (const auto &CurrentSection : ReexportedLibraries) { + for (const auto &Lib : CurrentSection.Values) + for (const auto &Target : CurrentSection.Targets) + File->addReexportedLibrary(Lib, Target); + } + + auto handleSymbols = [File](const SectionList &CurrentSections, + SymbolFlags Flag = SymbolFlags::None) { + for (const auto &CurrentSection : CurrentSections) { + for (auto &sym : CurrentSection.Symbols) + File->addSymbol(SymbolKind::GlobalSymbol, sym, + CurrentSection.Targets, Flag); + + for (auto &sym : CurrentSection.Classes) + File->addSymbol(SymbolKind::ObjectiveCClass, sym, + CurrentSection.Targets); + + for (auto &sym : CurrentSection.ClassEHs) + File->addSymbol(SymbolKind::ObjectiveCClassEHType, sym, + CurrentSection.Targets); + + for (auto &sym : CurrentSection.Ivars) + File->addSymbol(SymbolKind::ObjectiveCInstanceVariable, sym, + CurrentSection.Targets); + + for (auto &sym : CurrentSection.WeakSymbols) + File->addSymbol(SymbolKind::GlobalSymbol, sym, + CurrentSection.Targets); + for (auto &sym : CurrentSection.TlvSymbols) + File->addSymbol(SymbolKind::GlobalSymbol, sym, + CurrentSection.Targets, + SymbolFlags::ThreadLocalValue); + } + }; + + handleSymbols(Exports); + handleSymbols(Reexports, SymbolFlags::Rexported); + handleSymbols(Undefineds, SymbolFlags::Undefined); + + return File; + } + + unsigned TBDVersion; + std::vector UUIDs; + TargetList Targets; + StringRef InstallName; + PackedVersion CurrentVersion; + PackedVersion CompatibilityVersion; + SwiftVersion SwiftVersion{0}; + std::vector AllowableClients; + std::vector ReexportedLibraries; + TBDFlags Flags{TBDFlags::None}; + std::vector ParentUmbrellas; + SectionList Exports; + SectionList Reexports; + SectionList Undefineds; + + private: + using TargetList = SmallVector; + void assignTargetsToLibrary(const std::vector &Libraries, + std::vector &Section) { + std::set targetSet; + std::map valueToTargetList; + for (const auto &library : Libraries) { + TargetList targets(library.targets()); + valueToTargetList[&library] = targets; + targetSet.emplace(std::move(targets)); + } + + for (const auto &targets : targetSet) { + MetadataSection CurrentSection; + CurrentSection.Targets.insert(CurrentSection.Targets.begin(), + targets.begin(), targets.end()); + + for (const auto &it : valueToTargetList) { + if (it.second != targets) + continue; + + CurrentSection.Values.emplace_back(it.first->getInstallName()); + } + llvm::sort(CurrentSection.Values); + Section.emplace_back(std::move(CurrentSection)); + } + } + }; + + static void mapKeysToValues(FileType FileKind, IO &IO, + const InterfaceFile *&File) { + MappingNormalization Keys(IO, File); IO.mapRequired("archs", Keys->Architectures); - if (Ctx->FileKind != FileType::TBD_V1) + if (FileKind != FileType::TBD_V1) IO.mapOptional("uuids", Keys->UUIDs); IO.mapRequired("platform", Keys->Platforms); - if (Ctx->FileKind != FileType::TBD_V1) + if (FileKind != FileType::TBD_V1) IO.mapOptional("flags", Keys->Flags, TBDFlags::None); IO.mapRequired("install-name", Keys->InstallName); IO.mapOptional("current-version", Keys->CurrentVersion, PackedVersion(1, 0, 0)); IO.mapOptional("compatibility-version", Keys->CompatibilityVersion, PackedVersion(1, 0, 0)); - if (Ctx->FileKind != FileType::TBD_V3) + if (FileKind != FileType::TBD_V3) IO.mapOptional("swift-version", Keys->SwiftABIVersion, SwiftVersion(0)); else IO.mapOptional("swift-abi-version", Keys->SwiftABIVersion, SwiftVersion(0)); IO.mapOptional("objc-constraint", Keys->ObjCConstraint, - (Ctx->FileKind == FileType::TBD_V1) + (FileKind == FileType::TBD_V1) ? ObjCConstraintType::None : ObjCConstraintType::Retain_Release); - if (Ctx->FileKind != FileType::TBD_V1) + if (FileKind != FileType::TBD_V1) IO.mapOptional("parent-umbrella", Keys->ParentUmbrella, StringRef()); IO.mapOptional("exports", Keys->Exports); - if (Ctx->FileKind != FileType::TBD_V1) + if (FileKind != FileType::TBD_V1) IO.mapOptional("undefineds", Keys->Undefineds); } + + static void mapKeysToValuesV4(IO &IO, const InterfaceFile *&File) { + MappingNormalization Keys(IO, + File); + IO.mapTag("!tapi-tbd", true); + IO.mapRequired("tbd-version", Keys->TBDVersion); + IO.mapRequired("targets", Keys->Targets); + IO.mapOptional("uuids", Keys->UUIDs); + IO.mapOptional("flags", Keys->Flags, TBDFlags::None); + IO.mapRequired("install-name", Keys->InstallName); + IO.mapOptional("current-version", Keys->CurrentVersion, + PackedVersion(1, 0, 0)); + IO.mapOptional("compatibility-version", Keys->CompatibilityVersion, + PackedVersion(1, 0, 0)); + IO.mapOptional("swift-abi-version", Keys->SwiftVersion, SwiftVersion(0)); + IO.mapOptional("parent-umbrella", Keys->ParentUmbrellas); + auto OptionKind = MetadataSection::Option::Clients; + IO.mapOptionalWithContext("allowable-clients", Keys->AllowableClients, + OptionKind); + OptionKind = MetadataSection::Option::Libraries; + IO.mapOptionalWithContext("reexported-libraries", Keys->ReexportedLibraries, + OptionKind); + IO.mapOptional("exports", Keys->Exports); + IO.mapOptional("reexports", Keys->Reexports); + IO.mapOptional("undefineds", Keys->Undefineds); + } }; template <> diff --git a/llvm/lib/TextAPI/MachO/TextStubCommon.cpp b/llvm/lib/TextAPI/MachO/TextStubCommon.cpp index cfd9ac8d0cf6f..183c5d5a93b03 100644 --- a/llvm/lib/TextAPI/MachO/TextStubCommon.cpp +++ b/llvm/lib/TextAPI/MachO/TextStubCommon.cpp @@ -172,14 +172,25 @@ void ScalarTraits::output(const SwiftVersion &Value, void *, break; } } -StringRef ScalarTraits::input(StringRef Scalar, void *, +StringRef ScalarTraits::input(StringRef Scalar, void *IO, SwiftVersion &Value) { - Value = StringSwitch(Scalar) - .Case("1.0", 1) - .Case("1.1", 2) - .Case("2.0", 3) - .Case("3.0", 4) - .Default(0); + const auto *Ctx = reinterpret_cast(IO); + assert((!Ctx || Ctx->FileKind != FileType::Invalid) && + "File type is not set in context"); + + if (Ctx->FileKind == FileType::TBD_V4) { + if (Scalar.getAsInteger(10, Value)) + return "invalid Swift ABI version."; + return {}; + } else { + Value = StringSwitch(Scalar) + .Case("1.0", 1) + .Case("1.1", 2) + .Case("2.0", 3) + .Case("3.0", 4) + .Default(0); + } + if (Value != SwiftVersion(0)) return {}; diff --git a/llvm/unittests/TextAPI/CMakeLists.txt b/llvm/unittests/TextAPI/CMakeLists.txt index 7f49df43ac3d4..775ec2f1d1e88 100644 --- a/llvm/unittests/TextAPI/CMakeLists.txt +++ b/llvm/unittests/TextAPI/CMakeLists.txt @@ -7,6 +7,7 @@ add_llvm_unittest(TextAPITests TextStubV1Tests.cpp TextStubV2Tests.cpp TextStubV3Tests.cpp + TextStubV4Tests.cpp ) target_link_libraries(TextAPITests PRIVATE LLVMTestingSupport) diff --git a/llvm/unittests/TextAPI/TextStubV4Tests.cpp b/llvm/unittests/TextAPI/TextStubV4Tests.cpp new file mode 100644 index 0000000000000..1c8e7612df1ad --- /dev/null +++ b/llvm/unittests/TextAPI/TextStubV4Tests.cpp @@ -0,0 +1,558 @@ +//===-- TextStubV4Tests.cpp - TBD V4 File Test ----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===-----------------------------------------------------------------------===/ +#include "llvm/TextAPI/MachO/InterfaceFile.h" +#include "llvm/TextAPI/MachO/TextAPIReader.h" +#include "llvm/TextAPI/MachO/TextAPIWriter.h" +#include "gtest/gtest.h" +#include +#include + +using namespace llvm; +using namespace llvm::MachO; + +struct ExampleSymbol { + SymbolKind Kind; + std::string Name; + bool WeakDefined; + bool ThreadLocalValue; +}; +using ExampleSymbolSeq = std::vector; +using UUIDs = std::vector>; + +inline bool operator<(const ExampleSymbol &LHS, const ExampleSymbol &RHS) { + return std::tie(LHS.Kind, LHS.Name) < std::tie(RHS.Kind, RHS.Name); +} + +inline bool operator==(const ExampleSymbol &LHS, const ExampleSymbol &RHS) { + return std::tie(LHS.Kind, LHS.Name, LHS.WeakDefined, LHS.ThreadLocalValue) == + std::tie(RHS.Kind, RHS.Name, RHS.WeakDefined, RHS.ThreadLocalValue); +} + +static ExampleSymbol TBDv4ExportedSymbols[] = { + {SymbolKind::GlobalSymbol, "_symA", false, false}, + {SymbolKind::GlobalSymbol, "_symAB", false, false}, + {SymbolKind::GlobalSymbol, "_symB", false, false}, +}; + +static ExampleSymbol TBDv4ReexportedSymbols[] = { + {SymbolKind::GlobalSymbol, "_symC", false, false}, +}; + +static ExampleSymbol TBDv4UndefinedSymbols[] = { + {SymbolKind::GlobalSymbol, "_symD", false, false}, +}; + +namespace TBDv4 { + +TEST(TBDv4, ReadFile) { + static const char tbd_v4_file[] = + "--- !tapi-tbd\n" + "tbd-version: 4\n" + "targets: [ i386-macos, x86_64-macos, x86_64-ios ]\n" + "uuids:\n" + " - target: i386-macos\n" + " value: 00000000-0000-0000-0000-000000000000\n" + " - target: x86_64-macos\n" + " value: 11111111-1111-1111-1111-111111111111\n" + " - target: x86_64-ios\n" + " value: 11111111-1111-1111-1111-111111111111\n" + "flags: [ flat_namespace, installapi ]\n" + "install-name: Umbrella.framework/Umbrella\n" + "current-version: 1.2.3\n" + "compatibility-version: 1.2\n" + "swift-abi-version: 5\n" + "parent-umbrella:\n" + " - targets: [ i386-macos, x86_64-macos, x86_64-ios ]\n" + " umbrella: System\n" + "allowable-clients:\n" + " - targets: [ i386-macos, x86_64-macos, x86_64-ios ]\n" + " clients: [ ClientA ]\n" + "reexported-libraries:\n" + " - targets: [ i386-macos ]\n" + " libraries: [ /System/Library/Frameworks/A.framework/A ]\n" + "exports:\n" + " - targets: [ i386-macos ]\n" + " symbols: [ _symA ]\n" + " objc-classes: []\n" + " objc-eh-types: []\n" + " objc-ivars: []\n" + " weak-symbols: []\n" + " thread-local-symbols: []\n" + " - targets: [ x86_64-ios ]\n" + " symbols: [_symB]\n" + " - targets: [ x86_64-macos, x86_64-ios ]\n" + " symbols: [_symAB]\n" + "reexports:\n" + " - targets: [ i386-macos ]\n" + " symbols: [_symC]\n" + " objc-classes: []\n" + " objc-eh-types: []\n" + " objc-ivars: []\n" + " weak-symbols: []\n" + " thread-local-symbols: []\n" + "undefineds:\n" + " - targets: [ i386-macos ]\n" + " symbols: [ _symD ]\n" + " objc-classes: []\n" + " objc-eh-types: []\n" + " objc-ivars: []\n" + " weak-symbols: []\n" + " thread-local-symbols: []\n" + "...\n"; + + auto Result = TextAPIReader::get(MemoryBufferRef(tbd_v4_file, "Test.tbd")); + EXPECT_TRUE(!!Result); + auto File = std::move(Result.get()); + EXPECT_EQ(FileType::TBD_V4, File->getFileType()); + PlatformSet Platforms; + Platforms.insert(PlatformKind::macOS); + Platforms.insert(PlatformKind::iOS); + auto Archs = AK_i386 | AK_x86_64; + TargetList Targets = { + Target(AK_i386, PlatformKind::macOS), + Target(AK_x86_64, PlatformKind::macOS), + Target(AK_x86_64, PlatformKind::iOS), + }; + UUIDs uuids = {{Targets[0], "00000000-0000-0000-0000-000000000000"}, + {Targets[1], "11111111-1111-1111-1111-111111111111"}, + {Targets[2], "11111111-1111-1111-1111-111111111111"}}; + EXPECT_EQ(Archs, File->getArchitectures()); + EXPECT_EQ(uuids, File->uuids()); + EXPECT_EQ(Platforms.size(), File->getPlatforms().size()); + for (auto Platform : File->getPlatforms()) + EXPECT_EQ(Platforms.count(Platform), 1U); + EXPECT_EQ(std::string("Umbrella.framework/Umbrella"), File->getInstallName()); + EXPECT_EQ(PackedVersion(1, 2, 3), File->getCurrentVersion()); + EXPECT_EQ(PackedVersion(1, 2, 0), File->getCompatibilityVersion()); + EXPECT_EQ(5U, File->getSwiftABIVersion()); + EXPECT_FALSE(File->isTwoLevelNamespace()); + EXPECT_TRUE(File->isApplicationExtensionSafe()); + EXPECT_TRUE(File->isInstallAPI()); + InterfaceFileRef client("ClientA", Targets); + InterfaceFileRef reexport("/System/Library/Frameworks/A.framework/A", + {Targets[0]}); + EXPECT_EQ(1U, File->allowableClients().size()); + EXPECT_EQ(client, File->allowableClients().front()); + EXPECT_EQ(1U, File->reexportedLibraries().size()); + EXPECT_EQ(reexport, File->reexportedLibraries().front()); + + ExampleSymbolSeq Exports, Reexports, Undefineds; + ExampleSymbol temp; + for (const auto *Sym : File->symbols()) { + temp = ExampleSymbol{Sym->getKind(), Sym->getName(), Sym->isWeakDefined(), + Sym->isThreadLocalValue()}; + EXPECT_FALSE(Sym->isWeakReferenced()); + if (Sym->isUndefined()) + Undefineds.emplace_back(std::move(temp)); + else + Sym->isReexported() ? Reexports.emplace_back(std::move(temp)) + : Exports.emplace_back(std::move(temp)); + } + llvm::sort(Exports.begin(), Exports.end()); + llvm::sort(Reexports.begin(), Reexports.end()); + llvm::sort(Undefineds.begin(), Undefineds.end()); + + EXPECT_EQ(sizeof(TBDv4ExportedSymbols) / sizeof(ExampleSymbol), + Exports.size()); + EXPECT_EQ(sizeof(TBDv4ReexportedSymbols) / sizeof(ExampleSymbol), + Reexports.size()); + EXPECT_EQ(sizeof(TBDv4UndefinedSymbols) / sizeof(ExampleSymbol), + Undefineds.size()); + EXPECT_TRUE(std::equal(Exports.begin(), Exports.end(), + std::begin(TBDv4ExportedSymbols))); + EXPECT_TRUE(std::equal(Reexports.begin(), Reexports.end(), + std::begin(TBDv4ReexportedSymbols))); + EXPECT_TRUE(std::equal(Undefineds.begin(), Undefineds.end(), + std::begin(TBDv4UndefinedSymbols))); +} + +TEST(TBDv4, WriteFile) { + static const char tbd_v4_file[] = + "--- !tapi-tbd\n" + "tbd-version: 4\n" + "targets: [ i386-macos, x86_64-ios-simulator ]\n" + "uuids:\n" + " - target: i386-macos\n" + " value: 00000000-0000-0000-0000-000000000000\n" + " - target: x86_64-ios-simulator\n" + " value: 11111111-1111-1111-1111-111111111111\n" + "flags: [ installapi ]\n" + "install-name: 'Umbrella.framework/Umbrella'\n" + "current-version: 1.2.3\n" + "compatibility-version: 0\n" + "swift-abi-version: 5\n" + "parent-umbrella:\n" + " - targets: [ i386-macos, x86_64-ios-simulator ]\n" + " umbrella: System\n" + "allowable-clients:\n" + " - targets: [ i386-macos ]\n" + " clients: [ ClientA ]\n" + "exports:\n" + " - targets: [ i386-macos ]\n" + " symbols: [ _symA ]\n" + " objc-classes: [ Class1 ]\n" + " weak-symbols: [ _symC ]\n" + " - targets: [ x86_64-ios-simulator ]\n" + " symbols: [ _symB ]\n" + "...\n"; + + InterfaceFile File; + TargetList Targets = { + Target(AK_i386, PlatformKind::macOS), + Target(AK_x86_64, PlatformKind::iOSSimulator), + }; + UUIDs uuids = {{Targets[0], "00000000-0000-0000-0000-000000000000"}, + {Targets[1], "11111111-1111-1111-1111-111111111111"}}; + File.setInstallName("Umbrella.framework/Umbrella"); + File.setFileType(FileType::TBD_V4); + File.addTargets(Targets); + File.addUUID(uuids[0].first, uuids[0].second); + File.addUUID(uuids[1].first, uuids[1].second); + File.setCurrentVersion(PackedVersion(1, 2, 3)); + File.setTwoLevelNamespace(); + File.setInstallAPI(true); + File.setApplicationExtensionSafe(true); + File.setSwiftABIVersion(5); + File.addAllowableClient("ClientA", Targets[0]); + File.addParentUmbrella(Targets[0], "System"); + File.addParentUmbrella(Targets[1], "System"); + File.addSymbol(SymbolKind::GlobalSymbol, "_symA", {Targets[0]}); + File.addSymbol(SymbolKind::GlobalSymbol, "_symB", {Targets[1]}); + File.addSymbol(SymbolKind::GlobalSymbol, "_symC", {Targets[0]}, + SymbolFlags::WeakDefined); + File.addSymbol(SymbolKind::ObjectiveCClass, "Class1", {Targets[0]}); + + SmallString<4096> Buffer; + raw_svector_ostream OS(Buffer); + auto Result = TextAPIWriter::writeToStream(OS, File); + EXPECT_FALSE(Result); + EXPECT_STREQ(tbd_v4_file, Buffer.c_str()); +} + +TEST(TBDv4, MultipleTargets) { + static const char tbd_multiple_targets[] = + "--- !tapi-tbd\n" + "tbd-version: 4\n" + "targets: [ i386-maccatalyst, x86_64-tvos, arm64-ios ]\n" + "install-name: Test.dylib\n" + "...\n"; + + auto Result = + TextAPIReader::get(MemoryBufferRef(tbd_multiple_targets, "Test.tbd")); + EXPECT_TRUE(!!Result); + PlatformSet Platforms; + Platforms.insert(PlatformKind::macCatalyst); + Platforms.insert(PlatformKind::tvOS); + Platforms.insert(PlatformKind::iOS); + auto File = std::move(Result.get()); + EXPECT_EQ(FileType::TBD_V4, File->getFileType()); + EXPECT_EQ(AK_x86_64 | AK_arm64 | AK_i386, File->getArchitectures()); + EXPECT_EQ(Platforms.size(), File->getPlatforms().size()); + for (auto Platform : File->getPlatforms()) + EXPECT_EQ(Platforms.count(Platform), 1U); +} + +TEST(TBDv4, MultipleTargetsSameArch) { + static const char tbd_targets_same_arch[] = + "--- !tapi-tbd\n" + "tbd-version: 4\n" + "targets: [ x86_64-maccatalyst, x86_64-tvos ]\n" + "install-name: Test.dylib\n" + "...\n"; + + auto Result = + TextAPIReader::get(MemoryBufferRef(tbd_targets_same_arch, "Test.tbd")); + EXPECT_TRUE(!!Result); + PlatformSet Platforms; + Platforms.insert(PlatformKind::tvOS); + Platforms.insert(PlatformKind::macCatalyst); + auto File = std::move(Result.get()); + EXPECT_EQ(FileType::TBD_V4, File->getFileType()); + EXPECT_EQ(ArchitectureSet(AK_x86_64), File->getArchitectures()); + EXPECT_EQ(Platforms.size(), File->getPlatforms().size()); + for (auto Platform : File->getPlatforms()) + EXPECT_EQ(Platforms.count(Platform), 1U); +} + +TEST(TBDv4, MultipleTargetsSamePlatform) { + static const char tbd_multiple_targets_same_platform[] = + "--- !tapi-tbd\n" + "tbd-version: 4\n" + "targets: [ arm64-ios, armv7k-ios ]\n" + "install-name: Test.dylib\n" + "...\n"; + + auto Result = TextAPIReader::get( + MemoryBufferRef(tbd_multiple_targets_same_platform, "Test.tbd")); + EXPECT_TRUE(!!Result); + auto File = std::move(Result.get()); + EXPECT_EQ(FileType::TBD_V4, File->getFileType()); + EXPECT_EQ(AK_arm64 | AK_armv7k, File->getArchitectures()); + EXPECT_EQ(File->getPlatforms().size(), 1U); + EXPECT_EQ(PlatformKind::iOS, *File->getPlatforms().begin()); +} + +TEST(TBDv4, Target_maccatalyst) { + static const char tbd_target_maccatalyst[] = + "--- !tapi-tbd\n" + "tbd-version: 4\n" + "targets: [ x86_64-maccatalyst ]\n" + "install-name: Test.dylib\n" + "...\n"; + + auto Result = + TextAPIReader::get(MemoryBufferRef(tbd_target_maccatalyst, "Test.tbd")); + EXPECT_TRUE(!!Result); + auto File = std::move(Result.get()); + EXPECT_EQ(FileType::TBD_V4, File->getFileType()); + EXPECT_EQ(ArchitectureSet(AK_x86_64), File->getArchitectures()); + EXPECT_EQ(File->getPlatforms().size(), 1U); + EXPECT_EQ(PlatformKind::macCatalyst, *File->getPlatforms().begin()); +} + +TEST(TBDv4, Target_x86_ios) { + static const char tbd_target_x86_ios[] = "--- !tapi-tbd\n" + "tbd-version: 4\n" + "targets: [ x86_64-ios ]\n" + "install-name: Test.dylib\n" + "...\n"; + + auto Result = + TextAPIReader::get(MemoryBufferRef(tbd_target_x86_ios, "Test.tbd")); + EXPECT_TRUE(!!Result); + auto File = std::move(Result.get()); + EXPECT_EQ(FileType::TBD_V4, File->getFileType()); + EXPECT_EQ(ArchitectureSet(AK_x86_64), File->getArchitectures()); + EXPECT_EQ(File->getPlatforms().size(), 1U); + EXPECT_EQ(PlatformKind::iOS, *File->getPlatforms().begin()); +} + +TEST(TBDv4, Target_arm_bridgeOS) { + static const char tbd_platform_bridgeos[] = "--- !tapi-tbd\n" + "tbd-version: 4\n" + "targets: [ armv7k-bridgeos ]\n" + "install-name: Test.dylib\n" + "...\n"; + + auto Result = + TextAPIReader::get(MemoryBufferRef(tbd_platform_bridgeos, "Test.tbd")); + EXPECT_TRUE(!!Result); + auto File = std::move(Result.get()); + EXPECT_EQ(FileType::TBD_V4, File->getFileType()); + EXPECT_EQ(File->getPlatforms().size(), 1U); + EXPECT_EQ(PlatformKind::bridgeOS, *File->getPlatforms().begin()); + EXPECT_EQ(ArchitectureSet(AK_armv7k), File->getArchitectures()); +} + +TEST(TBDv4, Target_x86_macos) { + static const char tbd_x86_macos[] = "--- !tapi-tbd\n" + "tbd-version: 4\n" + "targets: [ x86_64-macos ]\n" + "install-name: Test.dylib\n" + "...\n"; + + auto Result = TextAPIReader::get(MemoryBufferRef(tbd_x86_macos, "Test.tbd")); + EXPECT_TRUE(!!Result); + auto File = std::move(Result.get()); + EXPECT_EQ(FileType::TBD_V4, File->getFileType()); + EXPECT_EQ(ArchitectureSet(AK_x86_64), File->getArchitectures()); + EXPECT_EQ(File->getPlatforms().size(), 1U); + EXPECT_EQ(PlatformKind::macOS, *File->getPlatforms().begin()); +} + +TEST(TBDv4, Target_x86_ios_simulator) { + static const char tbd_x86_ios_sim[] = "--- !tapi-tbd\n" + "tbd-version: 4\n" + "targets: [ x86_64-ios-simulator ]\n" + "install-name: Test.dylib\n" + "...\n"; + + auto Result = + TextAPIReader::get(MemoryBufferRef(tbd_x86_ios_sim, "Test.tbd")); + EXPECT_TRUE(!!Result); + auto File = std::move(Result.get()); + EXPECT_EQ(FileType::TBD_V4, File->getFileType()); + EXPECT_EQ(ArchitectureSet(AK_x86_64), File->getArchitectures()); + EXPECT_EQ(File->getPlatforms().size(), 1U); + EXPECT_EQ(PlatformKind::iOSSimulator, *File->getPlatforms().begin()); +} + +TEST(TBDv4, Target_x86_tvos_simulator) { + static const char tbd_x86_tvos_sim[] = + "--- !tapi-tbd\n" + "tbd-version: 4\n" + "targets: [ x86_64-tvos-simulator ]\n" + "install-name: Test.dylib\n" + "...\n"; + + auto Result = + TextAPIReader::get(MemoryBufferRef(tbd_x86_tvos_sim, "Test.tbd")); + EXPECT_TRUE(!!Result); + auto File = std::move(Result.get()); + EXPECT_EQ(FileType::TBD_V4, File->getFileType()); + EXPECT_EQ(ArchitectureSet(AK_x86_64), File->getArchitectures()); + EXPECT_EQ(File->getPlatforms().size(), 1U); + EXPECT_EQ(PlatformKind::tvOSSimulator, *File->getPlatforms().begin()); +} + +TEST(TBDv4, Target_i386_watchos_simulator) { + static const char tbd_i386_watchos_sim[] = + "--- !tapi-tbd\n" + "tbd-version: 4\n" + "targets: [ i386-watchos-simulator ]\n" + "install-name: Test.dylib\n" + "...\n"; + + auto Result = + TextAPIReader::get(MemoryBufferRef(tbd_i386_watchos_sim, "Test.tbd")); + EXPECT_TRUE(!!Result); + auto File = std::move(Result.get()); + EXPECT_EQ(FileType::TBD_V4, File->getFileType()); + EXPECT_EQ(ArchitectureSet(AK_i386), File->getArchitectures()); + EXPECT_EQ(File->getPlatforms().size(), 1U); + EXPECT_EQ(PlatformKind::watchOSSimulator, *File->getPlatforms().begin()); +} + +TEST(TBDv4, Swift_1) { + static const char tbd_swift_1[] = "--- !tapi-tbd\n" + "tbd-version: 4\n" + "targets: [ x86_64-macos ]\n" + "install-name: Test.dylib\n" + "swift-abi-version: 1\n" + "...\n"; + + auto Result = TextAPIReader::get(MemoryBufferRef(tbd_swift_1, "Test.tbd")); + EXPECT_TRUE(!!Result); + auto File = std::move(Result.get()); + EXPECT_EQ(FileType::TBD_V4, File->getFileType()); + EXPECT_EQ(1U, File->getSwiftABIVersion()); +} + +TEST(TBDv4, Swift_2) { + static const char tbd_v1_swift_2[] = "--- !tapi-tbd\n" + "tbd-version: 4\n" + "targets: [ x86_64-macos ]\n" + "install-name: Test.dylib\n" + "swift-abi-version: 2\n" + "...\n"; + + auto Result = TextAPIReader::get(MemoryBufferRef(tbd_v1_swift_2, "Test.tbd")); + EXPECT_TRUE(!!Result); + auto File = std::move(Result.get()); + EXPECT_EQ(FileType::TBD_V4, File->getFileType()); + EXPECT_EQ(2U, File->getSwiftABIVersion()); +} + +TEST(TBDv4, Swift_5) { + static const char tbd_swift_5[] = "--- !tapi-tbd\n" + "tbd-version: 4\n" + "targets: [ x86_64-macos ]\n" + "install-name: Test.dylib\n" + "swift-abi-version: 5\n" + "...\n"; + + auto Result = TextAPIReader::get(MemoryBufferRef(tbd_swift_5, "Test.tbd")); + EXPECT_TRUE(!!Result); + auto File = std::move(Result.get()); + EXPECT_EQ(FileType::TBD_V4, File->getFileType()); + EXPECT_EQ(5U, File->getSwiftABIVersion()); +} + +TEST(TBDv4, Swift_99) { + static const char tbd_swift_99[] = "--- !tapi-tbd\n" + "tbd-version: 4\n" + "targets: [ x86_64-macos ]\n" + "install-name: Test.dylib\n" + "swift-abi-version: 99\n" + "...\n"; + + auto Result = TextAPIReader::get(MemoryBufferRef(tbd_swift_99, "Test.tbd")); + EXPECT_TRUE(!!Result); + auto File = std::move(Result.get()); + EXPECT_EQ(FileType::TBD_V4, File->getFileType()); + EXPECT_EQ(99U, File->getSwiftABIVersion()); +} + +TEST(TBDv4, InvalidArchitecture) { + static const char tbd_file_unknown_architecture[] = + "--- !tapi-tbd\n" + "tbd-version: 4\n" + "targets: [ foo-macos ]\n" + "install-name: Test.dylib\n" + "...\n"; + + auto Result = TextAPIReader::get( + MemoryBufferRef(tbd_file_unknown_architecture, "Test.tbd")); + EXPECT_FALSE(!!Result); + auto errorMessage = toString(Result.takeError()); + ASSERT_TRUE(errorMessage.compare(0, 15, "malformed file\n") == 0); +} + +TEST(TBDv4, InvalidPlatform) { + static const char tbd_file_invalid_platform[] = "--- !tapi-tbd\n" + "tbd-version: 4\n" + "targets: [ x86_64-maos ]\n" + "install-name: Test.dylib\n" + "...\n"; + + auto Result = TextAPIReader::get( + MemoryBufferRef(tbd_file_invalid_platform, "Test.tbd")); + EXPECT_FALSE(!!Result); + auto errorMessage = toString(Result.takeError()); + ASSERT_TRUE(errorMessage.compare(0, 15, "malformed file\n") == 0); +} + +TEST(TBDv4, MalformedFile1) { + static const char malformed_file1[] = "--- !tapi-tbd\n" + "tbd-version: 4\n" + "...\n"; + + auto Result = + TextAPIReader::get(MemoryBufferRef(malformed_file1, "Test.tbd")); + EXPECT_FALSE(!!Result); + auto errorMessage = toString(Result.takeError()); + ASSERT_EQ("malformed file\nTest.tbd:2:1: error: missing required key " + "'targets'\ntbd-version: 4\n^\n", + errorMessage); +} + +TEST(TBDv4, MalformedFile2) { + static const char malformed_file2[] = "--- !tapi-tbd\n" + "tbd-version: 4\n" + "targets: [ x86_64-macos ]\n" + "install-name: Test.dylib\n" + "foobar: \"unsupported key\"\n"; + + auto Result = + TextAPIReader::get(MemoryBufferRef(malformed_file2, "Test.tbd")); + EXPECT_FALSE(!!Result); + auto errorMessage = toString(Result.takeError()); + ASSERT_EQ( + "malformed file\nTest.tbd:5:9: error: unknown key 'foobar'\nfoobar: " + "\"unsupported key\"\n ^~~~~~~~~~~~~~~~~\n", + errorMessage); +} + +TEST(TBDv4, MalformedFile3) { + static const char tbd_v1_swift_1_1[] = "--- !tapi-tbd\n" + "tbd-version: 4\n" + "targets: [ x86_64-macos ]\n" + "install-name: Test.dylib\n" + "swift-abi-version: 1.1\n" + "...\n"; + + auto Result = + TextAPIReader::get(MemoryBufferRef(tbd_v1_swift_1_1, "Test.tbd")); + EXPECT_FALSE(!!Result); + auto errorMessage = toString(Result.takeError()); + EXPECT_EQ("malformed file\nTest.tbd:5:20: error: invalid Swift ABI " + "version.\nswift-abi-version: 1.1\n ^~~\n", + errorMessage); +} + +} // end namespace TBDv4 From ed5d1c12dc4e59990fbb3d1940d3efa8742e7968 Mon Sep 17 00:00:00 2001 From: GN Sync Bot Date: Tue, 8 Oct 2019 15:12:38 +0000 Subject: [PATCH 252/254] gn build: Merge r374058 llvm-svn: 374059 --- llvm/utils/gn/secondary/llvm/unittests/TextAPI/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/llvm/unittests/TextAPI/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/TextAPI/BUILD.gn index 8405795685bd2..8e51943b7e263 100644 --- a/llvm/utils/gn/secondary/llvm/unittests/TextAPI/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/unittests/TextAPI/BUILD.gn @@ -10,5 +10,6 @@ unittest("TextAPITests") { "TextStubV1Tests.cpp", "TextStubV2Tests.cpp", "TextStubV3Tests.cpp", + "TextStubV4Tests.cpp", ] } From 08daf8cf0a554990caa8a559053c12e41af52dd2 Mon Sep 17 00:00:00 2001 From: Hideto Ueno Date: Tue, 8 Oct 2019 15:20:19 +0000 Subject: [PATCH 253/254] [Attributor] Add helper class to compose two structured deduction. Summary: This patch introduces a generic way to compose two structured deductions. This will be used for composing generic deduction with `MustBeExecutedExplorer` and other existing generic deduction. Reviewers: jdoerfert, sstefan1 Subscribers: hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D66645 llvm-svn: 374060 --- llvm/lib/Transforms/IPO/Attributor.cpp | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp index 629be921fb481..3a1562252f552 100644 --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -560,6 +560,21 @@ static void clampReturnedValueStates(Attributor &A, const AAType &QueryingAA, S ^= *T; } +/// Helper class to compose two generic deduction +template class F, template class G> +struct AAComposeTwoGenericDeduction + : public F, StateType> { + AAComposeTwoGenericDeduction(const IRPosition &IRP) + : F, StateType>(IRP) {} + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + return F, StateType>::updateImpl(A) | + G::updateImpl(A); + } +}; + /// Helper class for generic deduction: return value -> returned position. template From c382d03ca85d0c93b3e49949006e42314b9d4409 Mon Sep 17 00:00:00 2001 From: Puyan Lotfi Date: Tue, 8 Oct 2019 15:23:14 +0000 Subject: [PATCH 254/254] [clang][ifs] Clang Interface Stubs ToolChain plumbing. Second Landing Attempt: This patch enables end to end support for generating ELF interface stubs directly from clang. Now the following: clang -emit-interface-stubs -o libfoo.so a.cpp b.cpp c.cpp will product an ELF binary with visible symbols populated. Visibility attributes and -fvisibility can be used to control what gets populated. * Adding ToolChain support for clang Driver IFS Merge Phase * Implementing a default InterfaceStubs Merge clang Tool, used by ToolChain * Adds support for the clang Driver to involve llvm-ifs on ifs files. * Adds -emit-merged-ifs flag, to tell llvm-ifs to emit a merged ifs text file instead of the final object format (normally ELF) Differential Revision: https://reviews.llvm.org/D63978 llvm-svn: 374061 --- clang/include/clang/Driver/Action.h | 12 ++++++ clang/include/clang/Driver/Options.td | 3 ++ clang/include/clang/Driver/Phases.h | 3 +- clang/include/clang/Driver/ToolChain.h | 2 + clang/include/clang/Driver/Types.def | 3 +- clang/lib/Driver/Action.cpp | 6 +++ clang/lib/Driver/CMakeLists.txt | 1 + clang/lib/Driver/Driver.cpp | 36 +++++++++++++---- clang/lib/Driver/Phases.cpp | 1 + clang/lib/Driver/ToolChain.cpp | 10 +++++ clang/lib/Driver/ToolChains/Clang.cpp | 25 ++---------- .../lib/Driver/ToolChains/InterfaceStubs.cpp | 37 +++++++++++++++++ clang/lib/Driver/ToolChains/InterfaceStubs.h | 36 +++++++++++++++++ clang/lib/Driver/Types.cpp | 18 ++++++++- clang/lib/Frontend/CompilerInvocation.cpp | 23 ++++++----- clang/test/InterfaceStubs/bad-format.cpp | 21 +++++----- .../class-template-specialization.cpp | 4 +- clang/test/InterfaceStubs/conflict-type.ifs | 16 ++++++++ clang/test/InterfaceStubs/driver-test.c | 11 +++++ clang/test/InterfaceStubs/externstatic.c | 26 +++++------- clang/test/InterfaceStubs/func.ifs | 40 +++++++++++++++++++ .../function-template-specialization.cpp | 8 ++-- clang/test/InterfaceStubs/inline.c | 33 +++++++-------- .../test/InterfaceStubs/merge-conflict-test.c | 3 ++ clang/test/InterfaceStubs/object-double.c | 5 +++ clang/test/InterfaceStubs/object-float.c | 3 ++ clang/test/InterfaceStubs/object.c | 7 ++++ clang/test/InterfaceStubs/object.cpp | 14 ------- clang/test/InterfaceStubs/object.ifs | 28 +++++++++++++ .../template-namespace-function.cpp | 2 +- clang/test/InterfaceStubs/weak.cpp | 2 +- clang/test/lit.cfg.py | 4 +- 32 files changed, 332 insertions(+), 111 deletions(-) create mode 100644 clang/lib/Driver/ToolChains/InterfaceStubs.cpp create mode 100644 clang/lib/Driver/ToolChains/InterfaceStubs.h create mode 100644 clang/test/InterfaceStubs/conflict-type.ifs create mode 100644 clang/test/InterfaceStubs/driver-test.c create mode 100644 clang/test/InterfaceStubs/func.ifs create mode 100644 clang/test/InterfaceStubs/merge-conflict-test.c create mode 100644 clang/test/InterfaceStubs/object-double.c create mode 100644 clang/test/InterfaceStubs/object-float.c create mode 100644 clang/test/InterfaceStubs/object.c delete mode 100644 clang/test/InterfaceStubs/object.cpp create mode 100644 clang/test/InterfaceStubs/object.ifs diff --git a/clang/include/clang/Driver/Action.h b/clang/include/clang/Driver/Action.h index c1ff0b1a60230..c6e90b2978354 100644 --- a/clang/include/clang/Driver/Action.h +++ b/clang/include/clang/Driver/Action.h @@ -65,6 +65,7 @@ class Action { BackendJobClass, AssembleJobClass, LinkJobClass, + IfsMergeJobClass, LipoJobClass, DsymutilJobClass, VerifyDebugInfoJobClass, @@ -485,6 +486,17 @@ class AssembleJobAction : public JobAction { } }; +class IfsMergeJobAction : public JobAction { + void anchor() override; + +public: + IfsMergeJobAction(ActionList &Inputs, types::ID Type); + + static bool classof(const Action *A) { + return A->getKind() == IfsMergeJobClass; + } +}; + class LinkJobAction : public JobAction { void anchor() override; diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 8346d149dcc01..ec906fa02b42e 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -633,6 +633,9 @@ def emit_llvm : Flag<["-"], "emit-llvm">, Flags<[CC1Option]>, Group; def emit_iterface_stubs : Flag<["-"], "emit-interface-stubs">, Flags<[CC1Option]>, Group, HelpText<"Generate Inteface Stub Files.">; +def emit_merged_ifs : Flag<["-"], "emit-merged-ifs">, + Flags<[CC1Option]>, Group, + HelpText<"Generate Interface Stub Files, emit merged text not binary.">; def iterface_stub_version_EQ : JoinedOrSeparate<["-"], "interface-stub-version=">, Flags<[CC1Option]>; def exported__symbols__list : Separate<["-"], "exported_symbols_list">; def e : JoinedOrSeparate<["-"], "e">, Group; diff --git a/clang/include/clang/Driver/Phases.h b/clang/include/clang/Driver/Phases.h index 7199c657848ca..63931c00c8901 100644 --- a/clang/include/clang/Driver/Phases.h +++ b/clang/include/clang/Driver/Phases.h @@ -20,7 +20,8 @@ namespace phases { Compile, Backend, Assemble, - Link + Link, + IfsMerge, }; enum { diff --git a/clang/include/clang/Driver/ToolChain.h b/clang/include/clang/Driver/ToolChain.h index d6ed87574faac..2ba16ab21a927 100644 --- a/clang/include/clang/Driver/ToolChain.h +++ b/clang/include/clang/Driver/ToolChain.h @@ -136,11 +136,13 @@ class ToolChain { mutable std::unique_ptr Clang; mutable std::unique_ptr Assemble; mutable std::unique_ptr Link; + mutable std::unique_ptr IfsMerge; mutable std::unique_ptr OffloadBundler; Tool *getClang() const; Tool *getAssemble() const; Tool *getLink() const; + Tool *getIfsMerge() const; Tool *getClangAs() const; Tool *getOffloadBundler() const; diff --git a/clang/include/clang/Driver/Types.def b/clang/include/clang/Driver/Types.def index a33598e7b7f02..79e8d109cd97e 100644 --- a/clang/include/clang/Driver/Types.def +++ b/clang/include/clang/Driver/Types.def @@ -84,7 +84,8 @@ TYPE("lto-bc", LTO_BC, INVALID, "o", phases // Misc. TYPE("ast", AST, INVALID, "ast", phases::Compile, phases::Backend, phases::Assemble, phases::Link) -TYPE("ifs", IFS, INVALID, "ifs", phases::Compile, phases::Backend, phases::Assemble, phases::Link) +TYPE("ifs", IFS, INVALID, "ifs", phases::IfsMerge) +TYPE("ifs-cpp", IFS_CPP, INVALID, "ifs", phases::Compile, phases::IfsMerge) TYPE("pcm", ModuleFile, INVALID, "pcm", phases::Compile, phases::Backend, phases::Assemble, phases::Link) TYPE("plist", Plist, INVALID, "plist", phases::Compile, phases::Backend, phases::Assemble, phases::Link) TYPE("rewritten-objc", RewrittenObjC,INVALID, "cpp", phases::Compile, phases::Backend, phases::Assemble, phases::Link) diff --git a/clang/lib/Driver/Action.cpp b/clang/lib/Driver/Action.cpp index 47b03f6643b84..0187cf981eb56 100644 --- a/clang/lib/Driver/Action.cpp +++ b/clang/lib/Driver/Action.cpp @@ -31,6 +31,7 @@ const char *Action::getClassName(ActionClass AC) { case CompileJobClass: return "compiler"; case BackendJobClass: return "backend"; case AssembleJobClass: return "assembler"; + case IfsMergeJobClass: return "interface-stub-merger"; case LinkJobClass: return "linker"; case LipoJobClass: return "lipo"; case DsymutilJobClass: return "dsymutil"; @@ -357,6 +358,11 @@ void AssembleJobAction::anchor() {} AssembleJobAction::AssembleJobAction(Action *Input, types::ID OutputType) : JobAction(AssembleJobClass, Input, OutputType) {} +void IfsMergeJobAction::anchor() {} + +IfsMergeJobAction::IfsMergeJobAction(ActionList &Inputs, types::ID Type) + : JobAction(IfsMergeJobClass, Inputs, Type) {} + void LinkJobAction::anchor() {} LinkJobAction::LinkJobAction(ActionList &Inputs, types::ID Type) diff --git a/clang/lib/Driver/CMakeLists.txt b/clang/lib/Driver/CMakeLists.txt index d90c0ff436071..64b5d70f42b6f 100644 --- a/clang/lib/Driver/CMakeLists.txt +++ b/clang/lib/Driver/CMakeLists.txt @@ -66,6 +66,7 @@ add_clang_library(clangDriver ToolChains/WebAssembly.cpp ToolChains/XCore.cpp ToolChains/PPCLinux.cpp + ToolChains/InterfaceStubs.cpp Types.cpp XRayArgs.cpp diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 73b258331f798..5b72b4aeecd20 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -274,11 +274,11 @@ phases::ID Driver::getFinalPhase(const DerivedArgList &DAL, (PhaseArg = DAL.getLastArg(options::OPT__SLASH_P))) { FinalPhase = phases::Preprocess; - // --precompile only runs up to precompilation. + // --precompile only runs up to precompilation. } else if ((PhaseArg = DAL.getLastArg(options::OPT__precompile))) { FinalPhase = phases::Precompile; - // -{fsyntax-only,-analyze,emit-ast} only run up to the compiler. + // -{fsyntax-only,-analyze,emit-ast} only run up to the compiler. } else if ((PhaseArg = DAL.getLastArg(options::OPT_fsyntax_only)) || (PhaseArg = DAL.getLastArg(options::OPT_print_supported_cpus)) || (PhaseArg = DAL.getLastArg(options::OPT_module_file_info)) || @@ -286,20 +286,23 @@ phases::ID Driver::getFinalPhase(const DerivedArgList &DAL, (PhaseArg = DAL.getLastArg(options::OPT_rewrite_objc)) || (PhaseArg = DAL.getLastArg(options::OPT_rewrite_legacy_objc)) || (PhaseArg = DAL.getLastArg(options::OPT__migrate)) || - (PhaseArg = DAL.getLastArg(options::OPT_emit_iterface_stubs)) || (PhaseArg = DAL.getLastArg(options::OPT__analyze)) || (PhaseArg = DAL.getLastArg(options::OPT_emit_ast))) { FinalPhase = phases::Compile; - // -S only runs up to the backend. + // clang interface stubs + } else if ((PhaseArg = DAL.getLastArg(options::OPT_emit_iterface_stubs))) { + FinalPhase = phases::IfsMerge; + + // -S only runs up to the backend. } else if ((PhaseArg = DAL.getLastArg(options::OPT_S))) { FinalPhase = phases::Backend; - // -c compilation only runs up to the assembler. + // -c compilation only runs up to the assembler. } else if ((PhaseArg = DAL.getLastArg(options::OPT_c))) { FinalPhase = phases::Assemble; - // Otherwise do everything. + // Otherwise do everything. } else FinalPhase = phases::Link; @@ -3337,6 +3340,7 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args, // Construct the actions to perform. HeaderModulePrecompileJobAction *HeaderModuleAction = nullptr; ActionList LinkerInputs; + ActionList MergerInputs; for (auto &I : Inputs) { types::ID InputType = I.first; @@ -3374,6 +3378,17 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args, break; } + // TODO: Consider removing this because the merged may not end up being + // the final Phase in the pipeline. Perhaps the merged could just merge + // and then pass an artifact of some sort to the Link Phase. + // Queue merger inputs. + if (Phase == phases::IfsMerge) { + assert(Phase == PL.back() && "merging must be final compilation step."); + MergerInputs.push_back(Current); + Current = nullptr; + break; + } + // Each precompiled header file after a module file action is a module // header of that same module file, rather than being compiled to a // separate PCH. @@ -3423,6 +3438,11 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args, Actions.push_back(LA); } + // Add an interface stubs merge action if necessary. + if (!MergerInputs.empty()) + Actions.push_back( + C.MakeAction(MergerInputs, types::TY_Image)); + // If --print-supported-cpus, -mcpu=? or -mtune=? is specified, build a custom // Compile phase that prints out supported cpu models and quits. if (Arg *A = Args.getLastArg(options::OPT_print_supported_cpus)) { @@ -3459,6 +3479,8 @@ Action *Driver::ConstructPhaseAction( switch (Phase) { case phases::Link: llvm_unreachable("link action invalid here."); + case phases::IfsMerge: + llvm_unreachable("ifsmerge action invalid here."); case phases::Preprocess: { types::ID OutputTy; // -M and -MM specify the dependency file name by altering the output type, @@ -3523,7 +3545,7 @@ Action *Driver::ConstructPhaseAction( if (Args.hasArg(options::OPT_verify_pch)) return C.MakeAction(Input, types::TY_Nothing); if (Args.hasArg(options::OPT_emit_iterface_stubs)) - return C.MakeAction(Input, types::TY_IFS); + return C.MakeAction(Input, types::TY_IFS_CPP); return C.MakeAction(Input, types::TY_LLVM_BC); } case phases::Backend: { diff --git a/clang/lib/Driver/Phases.cpp b/clang/lib/Driver/Phases.cpp index 5b776c63f713a..01598c59bd9eb 100644 --- a/clang/lib/Driver/Phases.cpp +++ b/clang/lib/Driver/Phases.cpp @@ -20,6 +20,7 @@ const char *phases::getPhaseName(ID Id) { case Backend: return "backend"; case Assemble: return "assembler"; case Link: return "linker"; + case IfsMerge: return "ifsmerger"; } llvm_unreachable("Invalid phase id."); diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp index 4379da99d07bd..db2497a10856f 100644 --- a/clang/lib/Driver/ToolChain.cpp +++ b/clang/lib/Driver/ToolChain.cpp @@ -10,6 +10,7 @@ #include "InputInfo.h" #include "ToolChains/Arch/ARM.h" #include "ToolChains/Clang.h" +#include "ToolChains/InterfaceStubs.h" #include "clang/Basic/ObjCRuntime.h" #include "clang/Basic/Sanitizers.h" #include "clang/Config/config.h" @@ -279,6 +280,12 @@ Tool *ToolChain::getLink() const { return Link.get(); } +Tool *ToolChain::getIfsMerge() const { + if (!IfsMerge) + IfsMerge.reset(new tools::ifstool::Merger(*this)); + return IfsMerge.get(); +} + Tool *ToolChain::getOffloadBundler() const { if (!OffloadBundler) OffloadBundler.reset(new tools::OffloadBundler(*this)); @@ -290,6 +297,9 @@ Tool *ToolChain::getTool(Action::ActionClass AC) const { case Action::AssembleJobClass: return getAssemble(); + case Action::IfsMergeJobClass: + return getIfsMerge(); + case Action::LinkJobClass: return getLink(); diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 8628741f7dae4..26e8bc60bfa4c 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -3683,32 +3683,15 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, } else if (JA.getType() == types::TY_LLVM_BC || JA.getType() == types::TY_LTO_BC) { CmdArgs.push_back("-emit-llvm-bc"); - } else if (JA.getType() == types::TY_IFS) { + } else if (JA.getType() == types::TY_IFS || + JA.getType() == types::TY_IFS_CPP) { StringRef ArgStr = Args.hasArg(options::OPT_iterface_stub_version_EQ) ? Args.getLastArgValue(options::OPT_iterface_stub_version_EQ) - : ""; - StringRef StubFormat = - llvm::StringSwitch(ArgStr) - .Case("experimental-ifs-v1", "experimental-ifs-v1") - .Default(""); - - if (StubFormat.empty()) { - std::string ErrorMessage = - "Invalid interface stub format: " + ArgStr.str() + - ((ArgStr == "experimental-yaml-elf-v1" || - ArgStr == "experimental-tapi-elf-v1") - ? " is deprecated." - : "."); - D.Diag(diag::err_drv_invalid_value) - << "Must specify a valid interface stub format type, ie: " - "-interface-stub-version=experimental-ifs-v1" - << ErrorMessage; - } - + : "experimental-ifs-v1"; CmdArgs.push_back("-emit-interface-stubs"); CmdArgs.push_back( - Args.MakeArgString(Twine("-interface-stub-version=") + StubFormat)); + Args.MakeArgString(Twine("-interface-stub-version=") + ArgStr.str())); } else if (JA.getType() == types::TY_PP_Asm) { CmdArgs.push_back("-S"); } else if (JA.getType() == types::TY_AST) { diff --git a/clang/lib/Driver/ToolChains/InterfaceStubs.cpp b/clang/lib/Driver/ToolChains/InterfaceStubs.cpp new file mode 100644 index 0000000000000..6677843b2c533 --- /dev/null +++ b/clang/lib/Driver/ToolChains/InterfaceStubs.cpp @@ -0,0 +1,37 @@ +//===--- InterfaceStubs.cpp - Base InterfaceStubs Implementations C++ ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "InterfaceStubs.h" +#include "CommonArgs.h" +#include "clang/Driver/Compilation.h" + +namespace clang { +namespace driver { +namespace tools { +namespace ifstool { +void Merger::ConstructJob(Compilation &C, const JobAction &JA, + const InputInfo &Output, const InputInfoList &Inputs, + const llvm::opt::ArgList &Args, + const char *LinkingOutput) const { + std::string Merger = getToolChain().GetProgramPath(getShortName()); + llvm::opt::ArgStringList CmdArgs; + CmdArgs.push_back("-action"); + CmdArgs.push_back(Args.getLastArg(options::OPT_emit_merged_ifs) + ? "write-ifs" + : "write-bin"); + CmdArgs.push_back("-o"); + CmdArgs.push_back(Output.getFilename()); + for (const auto &Input : Inputs) + CmdArgs.push_back(Input.getFilename()); + C.addCommand(std::make_unique(JA, *this, Args.MakeArgString(Merger), + CmdArgs, Inputs)); +} +} // namespace ifstool +} // namespace tools +} // namespace driver +} // namespace clang diff --git a/clang/lib/Driver/ToolChains/InterfaceStubs.h b/clang/lib/Driver/ToolChains/InterfaceStubs.h new file mode 100644 index 0000000000000..4afa73701a4c5 --- /dev/null +++ b/clang/lib/Driver/ToolChains/InterfaceStubs.h @@ -0,0 +1,36 @@ +//===--- InterfaceStubs.cpp - Base InterfaceStubs Implementations C++ ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_IFS_H +#define LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_IFS_H + +#include "clang/Driver/Tool.h" +#include "clang/Driver/ToolChain.h" + +namespace clang { +namespace driver { +namespace tools { +namespace ifstool { +class LLVM_LIBRARY_VISIBILITY Merger : public Tool { +public: + Merger(const ToolChain &TC) : Tool("IFS::Merger", "llvm-ifs", TC) {} + + bool hasIntegratedCPP() const override { return false; } + bool isLinkJob() const override { return false; } + + void ConstructJob(Compilation &C, const JobAction &JA, + const InputInfo &Output, const InputInfoList &Inputs, + const llvm::opt::ArgList &TCArgs, + const char *LinkingOutput) const override; +}; +} // end namespace ifstool +} // end namespace tools +} // end namespace driver +} // end namespace clang + +#endif // LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_IFS_H diff --git a/clang/lib/Driver/Types.cpp b/clang/lib/Driver/Types.cpp index da01e1acfcaa2..6241fac0c85ac 100644 --- a/clang/lib/Driver/Types.cpp +++ b/clang/lib/Driver/Types.cpp @@ -269,6 +269,7 @@ types::ID types::lookupTypeForExtension(llvm::StringRef Ext) { .Case("lib", TY_Object) .Case("mii", TY_PP_ObjCXX) .Case("obj", TY_Object) + .Case("ifs", TY_IFS) .Case("pch", TY_PCH) .Case("pcm", TY_ModuleFile) .Case("c++m", TY_CXXModule) @@ -319,6 +320,22 @@ void types::getCompilationPhases(const clang::driver::Driver &Driver, llvm::copy_if(PhaseList, std::back_inserter(P), [](phases::ID Phase) { return Phase <= phases::Precompile; }); + // Treat Interface Stubs like its own compilation mode. + else if (DAL.getLastArg(options::OPT_emit_iterface_stubs)) { + llvm::SmallVector IfsModePhaseList; + llvm::SmallVector &PL = PhaseList; + phases::ID LastPhase = phases::IfsMerge; + if (Id != types::TY_IFS) { + if (DAL.hasArg(options::OPT_c)) + LastPhase = phases::Compile; + PL = IfsModePhaseList; + types::getCompilationPhases(types::TY_IFS_CPP, PL); + } + llvm::copy_if(PL, std::back_inserter(P), [&](phases::ID Phase) { + return Phase <= LastPhase; + }); + } + // -{fsyntax-only,-analyze,emit-ast} only run up to the compiler. else if (DAL.getLastArg(options::OPT_fsyntax_only) || DAL.getLastArg(options::OPT_print_supported_cpus) || @@ -327,7 +344,6 @@ void types::getCompilationPhases(const clang::driver::Driver &Driver, DAL.getLastArg(options::OPT_rewrite_objc) || DAL.getLastArg(options::OPT_rewrite_legacy_objc) || DAL.getLastArg(options::OPT__migrate) || - DAL.getLastArg(options::OPT_emit_iterface_stubs) || DAL.getLastArg(options::OPT__analyze) || DAL.getLastArg(options::OPT_emit_ast)) llvm::copy_if(PhaseList, std::back_inserter(P), diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index 61a2c07890fc5..6fc8afcf3c66d 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -1734,24 +1734,25 @@ static InputKind ParseFrontendArgs(FrontendOptions &Opts, ArgList &Args, StringRef ArgStr = Args.hasArg(OPT_iterface_stub_version_EQ) ? Args.getLastArgValue(OPT_iterface_stub_version_EQ) - : ""; - llvm::Optional ProgramAction = - llvm::StringSwitch>(ArgStr) - .Case("experimental-ifs-v1", frontend::GenerateInterfaceIfsExpV1) - .Default(llvm::None); - if (!ProgramAction) { + : "experimental-ifs-v1"; + if (ArgStr == "experimental-yaml-elf-v1" || + ArgStr == "experimental-tapi-elf-v1") { std::string ErrorMessage = "Invalid interface stub format: " + ArgStr.str() + - ((ArgStr == "experimental-yaml-elf-v1" || - ArgStr == "experimental-tapi-elf-v1") - ? " is deprecated." - : "."); + " is deprecated."; + Diags.Report(diag::err_drv_invalid_value) + << "Must specify a valid interface stub format type, ie: " + "-interface-stub-version=experimental-ifs-v1" + << ErrorMessage; + } else if (ArgStr != "experimental-ifs-v1") { + std::string ErrorMessage = + "Invalid interface stub format: " + ArgStr.str() + "."; Diags.Report(diag::err_drv_invalid_value) << "Must specify a valid interface stub format type, ie: " "-interface-stub-version=experimental-ifs-v1" << ErrorMessage; } else { - Opts.ProgramAction = *ProgramAction; + Opts.ProgramAction = frontend::GenerateInterfaceIfsExpV1; } break; } diff --git a/clang/test/InterfaceStubs/bad-format.cpp b/clang/test/InterfaceStubs/bad-format.cpp index f13ea2fc76b31..4d51ac867eb2a 100644 --- a/clang/test/InterfaceStubs/bad-format.cpp +++ b/clang/test/InterfaceStubs/bad-format.cpp @@ -1,28 +1,25 @@ -// REQUIRES: x86-registered-target -// RUN: not %clang -target x86_64-linux-gnu -o - -emit-interface-stubs \ -// RUN: -interface-stub-version=bar-format %s 2>&1 | FileCheck %s +// RUN: not %clang -emit-interface-stubs -interface-stub-version=bad-format %s 2>&1 | \ +// RUN: FileCheck %s -// RUN: not %clang -target x86_64-linux-gnu -o - -emit-interface-stubs \ -// RUN: -interface-stub-version=experimental-tapi-elf-v1 %s 2>&1 | \ +// RUN: not %clang -emit-interface-stubs -interface-stub-version=experimental-tapi-elf-v1 %s 2>&1 | \ // RUN: FileCheck -check-prefix=CHECK-TAPI-DEPRECATED %s -// RUN: not %clang -target x86_64-linux-gnu -o - -emit-interface-stubs \ -// RUN: -interface-stub-version=experimental-yaml-elf-v1 %s 2>&1 | \ +// RUN: not %clang -emit-interface-stubs -interface-stub-version=experimental-yaml-elf-v1 %s 2>&1 | \ // RUN: FileCheck -check-prefix=CHECK-YAML-DEPRECATED %s -// RUN: not %clang_cc1 -target x86_64-linux-gnu -o - -emit-interface-stubs \ -// RUN: -interface-stub-version=bar-format %s 2>&1 | FileCheck %s +// RUN: not %clang -emit-interface-stubs -interface-stub-version=bad-format %s 2>&1 | \ +// RUN: FileCheck %s -// RUN: not %clang_cc1 -target x86_64-linux-gnu -o - -emit-interface-stubs \ +// RUN: not %clang -emit-interface-stubs \ // RUN: -interface-stub-version=experimental-tapi-elf-v1 %s 2>&1 | \ // RUN: FileCheck -check-prefix=CHECK-TAPI-DEPRECATED %s -// RUN: not %clang_cc1 -target x86_64-linux-gnu -o - -emit-interface-stubs \ +// RUN: not %clang -emit-interface-stubs \ // RUN: -interface-stub-version=experimental-yaml-elf-v1 %s 2>&1 | \ // RUN: FileCheck -check-prefix=CHECK-YAML-DEPRECATED %s // CHECK: error: invalid value -// CHECK: 'Invalid interface stub format: bar-format.' in 'Must specify a +// CHECK: 'Invalid interface stub format: bad-format.' in 'Must specify a // CHECK: valid interface stub format type, ie: // CHECK: -interface-stub-version=experimental-ifs-v1' diff --git a/clang/test/InterfaceStubs/class-template-specialization.cpp b/clang/test/InterfaceStubs/class-template-specialization.cpp index 29e459390495a..0a68d3f1f117e 100644 --- a/clang/test/InterfaceStubs/class-template-specialization.cpp +++ b/clang/test/InterfaceStubs/class-template-specialization.cpp @@ -1,9 +1,9 @@ // REQUIRES: x86-registered-target -// RUN: %clang -target x86_64-unknown-linux-gnu -o - -emit-interface-stubs \ +// RUN: %clang -target x86_64-unknown-linux-gnu -o - -emit-interface-stubs -emit-merged-ifs \ // RUN: -interface-stub-version=experimental-ifs-v1 %s | \ // RUN: FileCheck -check-prefix=CHECK-TAPI %s -// RUN: %clang -target x86_64-unknown-linux-gnu -o - -emit-interface-stubs \ +// RUN: %clang -target x86_64-unknown-linux-gnu -o - -emit-interface-stubs -emit-merged-ifs \ // RUN: -interface-stub-version=experimental-ifs-v1 %s | \ // RUN: FileCheck -check-prefix=CHECK-TAPI2 %s // RUN: %clang -target x86_64-unknown-linux-gnu -o - -c %s | \ diff --git a/clang/test/InterfaceStubs/conflict-type.ifs b/clang/test/InterfaceStubs/conflict-type.ifs new file mode 100644 index 0000000000000..aaa04775e3176 --- /dev/null +++ b/clang/test/InterfaceStubs/conflict-type.ifs @@ -0,0 +1,16 @@ +# RUN: not %clang -emit-merged-ifs -emit-interface-stubs -o - %s %S/func.ifs 2>&1 | \ +# RUN: FileCheck %s --check-prefixes=CHECK-IFS + +# Here we are testing to see if two symbols with identical names will fail to +# merge in conflict due to mismatched types. +# CHECK-IFS: error: Interface Stub: Type Mismatch for a. +# CHECK-IFS-NEXT: Filename: +# CHECK-IFS-NEXT: Type Values: Object Func + +--- !experimental-ifs-v1 +IfsVersion: 1.0 +Triple: x86_64-linux-gnu +ObjectFileFormat: ELF +Symbols: + a: { Type: Object, Size: 1 } +... diff --git a/clang/test/InterfaceStubs/driver-test.c b/clang/test/InterfaceStubs/driver-test.c new file mode 100644 index 0000000000000..d4e50295411ad --- /dev/null +++ b/clang/test/InterfaceStubs/driver-test.c @@ -0,0 +1,11 @@ +// REQUIRES: x86-registered-target + +// RUN: %clang -target x86_64-unknown-linux-gnu -x c -o %t1.so -emit-interface-stubs %s %S/object.c %S/weak.cpp && \ +// RUN: llvm-nm %t1.so 2>&1 | FileCheck --check-prefix=CHECK-IFS %s + +// CHECK-IFS-DAG: data +// CHECK-IFS-DAG: foo +// CHECK-IFS-DAG: strongFunc +// CHECK-IFS-DAG: weakFunc + +int foo(int bar) { return 42 + 1844; } diff --git a/clang/test/InterfaceStubs/externstatic.c b/clang/test/InterfaceStubs/externstatic.c index bc1aef477bcd5..37d5b7db19a2d 100644 --- a/clang/test/InterfaceStubs/externstatic.c +++ b/clang/test/InterfaceStubs/externstatic.c @@ -1,24 +1,20 @@ -// REQUIRES: x86-registered-target -// RUN: %clang -DSTORAGE="extern" -target x86_64-unknown-linux-gnu -o - \ -// RUN: -emit-interface-stubs \ -// RUN: -interface-stub-version=experimental-ifs-v1 -std=c99 -xc %s | \ +// RUN: %clang -c -DSTORAGE="extern" -o - -emit-interface-stubs -std=c99 -xc %s | \ // RUN: FileCheck -check-prefix=CHECK-EXTERN %s -// RUN: %clang -DSTORAGE="extern" -target x86_64-linux-gnu -O0 -o - -c -std=c99 \ + +// RUN: %clang -DSTORAGE="extern" -O0 -o - -c -std=c99 \ // RUN: -xc %s | llvm-nm - 2>&1 | FileCheck -check-prefix=CHECK-EXTERN %s -// RUN: %clang -DSTORAGE="extern" -target x86_64-unknown-linux-gnu -o - \ -// RUN: -emit-interface-stubs \ -// RUN: -interface-stub-version=experimental-ifs-v1 -std=c99 -xc %s | \ +// RUN: %clang -c -DSTORAGE="extern" -o - -emit-interface-stubs -std=c99 -xc %s | \ +// RUN: FileCheck -check-prefix=CHECK-EXTERN2 %s + +// RUN: %clang -DSTORAGE="extern" -O0 -o - -c -std=c99 -xc %s | llvm-nm - 2>&1 | \ // RUN: FileCheck -check-prefix=CHECK-EXTERN2 %s -// RUN: %clang -DSTORAGE="extern" -target x86_64-linux-gnu -O0 -o - -c -std=c99 \ -// RUN: -xc %s | llvm-nm - 2>&1 | FileCheck -check-prefix=CHECK-EXTERN2 %s -// RUN: %clang -DSTORAGE="static" -target x86_64-unknown-linux-gnu -o - \ -// RUN: -emit-interface-stubs \ -// RUN: -interface-stub-version=experimental-ifs-v1 -std=c99 -xc %s | \ +// RUN: %clang -c -DSTORAGE="static" -o - -emit-interface-stubs -std=c99 -xc %s | \ +// RUN: FileCheck -check-prefix=CHECK-STATIC %s + +// RUN: %clang -DSTORAGE="static" -O0 -o - -c -std=c99 -xc %s | llvm-nm - 2>&1 | \ // RUN: FileCheck -check-prefix=CHECK-STATIC %s -// RUN: %clang -DSTORAGE="static" -target x86_64-linux-gnu -O0 -o - -c -std=c99 \ -// RUN: -xc %s | llvm-nm - 2>&1 | FileCheck -check-prefix=CHECK-STATIC %s // CHECK-EXTERN-NOT: foo // CHECK-STATIC-NOT: foo diff --git a/clang/test/InterfaceStubs/func.ifs b/clang/test/InterfaceStubs/func.ifs new file mode 100644 index 0000000000000..d115523bfda49 --- /dev/null +++ b/clang/test/InterfaceStubs/func.ifs @@ -0,0 +1,40 @@ +# RUN: %clang -emit-interface-stubs -o - %s %S/object.ifs -emit-merged-ifs | \ +# RUN: FileCheck %s --check-prefixes=CHECK-IFS + +# RUN: %clang -emit-interface-stubs -o - %s %S/object.ifs | llvm-readelf --all | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ELF + +# RUN: %clang -emit-interface-stubs -o - %s %s -emit-merged-ifs | \ +# RUN: FileCheck %s --check-prefixes=CHECK-MERGE-IFS + +# CHECK-IFS: --- !experimental-ifs-v1 +# CHECK-IFS-NEXT: IfsVersion: 1.0 +# CHECK-IFS-NEXT: Triple: x86_64-linux-gnu +# CHECK-IFS-NEXT: ObjectFileFormat: ELF +# CHECK-IFS-NEXT: Symbols: +# CHECK-IFS-DAG: a: { Type: Func } +# CHECK-IFS-DAG: b: { Type: Object, Size: 4 } +# CHECK-IFS: ... + +# CHECK-ELF: ELF Header: +# CHECK-ELF: Class: ELF64 +# CHECK-ELF: Type: DYN (Shared object file) +# CHECK-ELF: FUNC GLOBAL DEFAULT 1 a +# CHECK-ELF: OBJECT GLOBAL DEFAULT 1 b + +# Here we are testing to see if two identical symbols will merge. +# CHECK-MERGE-IFS: --- !experimental-ifs-v1 +# CHECK-MERGE-IFS-NEXT: IfsVersion: 1.0 +# CHECK-MERGE-IFS-NEXT: Triple: x86_64-linux-gnu +# CHECK-MERGE-IFS-NEXT: ObjectFileFormat: ELF +# CHECK-MERGE-IFS-NEXT: Symbols: +# CHECK-MERGE-IFS-NEXT: a: { Type: Func } +# CHECK-MERGE-IFS-NEXT: ... + +--- !experimental-ifs-v1 +IfsVersion: 1.0 +Triple: x86_64-linux-gnu +ObjectFileFormat: ELF +Symbols: + a: { Type: Func } +... diff --git a/clang/test/InterfaceStubs/function-template-specialization.cpp b/clang/test/InterfaceStubs/function-template-specialization.cpp index f6e5c87e7f442..f6ec64bcd916f 100644 --- a/clang/test/InterfaceStubs/function-template-specialization.cpp +++ b/clang/test/InterfaceStubs/function-template-specialization.cpp @@ -1,13 +1,15 @@ // REQUIRES: x86-registered-target -// RUN: %clang -target x86_64-unknown-linux-gnu -o - -emit-interface-stubs \ + +// TODO: Fix the case in llvm-ifs where it crashes on an empty Symbols list. +// RUN: %clang -target x86_64-unknown-linux-gnu -o - -emit-interface-stubs -c \ // RUN: -interface-stub-version=experimental-ifs-v1 %s | FileCheck %s -// RUN: %clang -target x86_64-unknown-linux-gnu -o - -emit-interface-stubs \ +// RUN: %clang -target x86_64-unknown-linux-gnu -o - -emit-interface-stubs -emit-merged-ifs \ // RUN: -interface-stub-version=experimental-ifs-v1 \ // RUN: -DUSE_TEMPLATE_FUNCTION=1 %s | \ // RUN: FileCheck -check-prefix=CHECK-USES-TEMPLATE-FUNCTION %s -// RUN: %clang -target x86_64-unknown-linux-gnu -o - -emit-interface-stubs \ +// RUN: %clang -target x86_64-unknown-linux-gnu -o - -emit-interface-stubs -emit-merged-ifs \ // RUN: -interface-stub-version=experimental-ifs-v1 \ // RUN: -DSPECIALIZE_TEMPLATE_FUNCTION=1 %s | \ // RUN: FileCheck -check-prefix=CHECK-SPECIALIZES-TEMPLATE-FUNCTION %s diff --git a/clang/test/InterfaceStubs/inline.c b/clang/test/InterfaceStubs/inline.c index e32b4e100960d..06a58c4c1bea4 100644 --- a/clang/test/InterfaceStubs/inline.c +++ b/clang/test/InterfaceStubs/inline.c @@ -1,37 +1,32 @@ // REQUIRES: x86-registered-target -// RUN: %clang -DINLINE=inline -target x86_64-unknown-linux-gnu -o - \ -// RUN: -emit-interface-stubs \ -// RUN: -interface-stub-version=experimental-ifs-v1 -std=gnu89 -xc %s | \ +// RUN: %clang -c -DINLINE=inline -target x86_64-unknown-linux-gnu -o - \ +// RUN: -emit-interface-stubs -std=gnu89 -xc %s | \ // RUN: FileCheck -check-prefix=CHECK-GNU %s // RUN: %clang -DINLINE=inline -target x86_64-linux-gnu -O0 -o - -c \ // RUN: -std=gnu89 -xc %s | llvm-nm - | FileCheck -check-prefix=CHECK-GNU %s -// RUN: %clang -DINLINE="__attribute__((always_inline))" \ -// RUN: -target x86_64-unknown-linux-gnu -o - -emit-interface-stubs \ -// RUN: -interface-stub-version=experimental-ifs-v1 -xc %s | \ +// RUN: %clang -c -DINLINE="__attribute__((always_inline))" \ +// RUN: -target x86_64-unknown-linux-gnu -o - -emit-interface-stubs -xc %s | \ // RUN: FileCheck -check-prefix=CHECK-GNU %s // RUN: %clang -DINLINE="__attribute__((always_inline))" \ // RUN: -target x86_64-linux-gnu -O0 -o - -c -xc %s | \ // RUN: llvm-nm - | FileCheck -check-prefix=CHECK-GNU %s -// RUN: %clang -DINLINE=inline -target x86_64-unknown-linux-gnu -o - \ -// RUN: -emit-interface-stubs \ -// RUN: -interface-stub-version=experimental-ifs-v1 -std=c99 -xc %s | \ +// RUN: %clang -c -DINLINE=inline -target x86_64-unknown-linux-gnu -o - \ +// RUN: -emit-interface-stubs -std=c99 -xc %s | \ // RUN: FileCheck -check-prefix=CHECK-STD %s // RUN: %clang -DINLINE=inline -target x86_64-linux-gnu -O0 -o - -c -std=c99 \ // RUN: -xc %s | llvm-nm - 2>&1 | FileCheck -check-prefix=CHECK-STD %s -// RUN: %clang -DINLINE="__attribute__((noinline))" \ -// RUN: -target x86_64-unknown-linux-gnu -o - -emit-interface-stubs \ -// RUN: -interface-stub-version=experimental-ifs-v1 -std=c99 -xc %s | \ +// RUN: %clang -c -DINLINE="__attribute__((noinline))" \ +// RUN: -target x86_64-unknown-linux-gnu -o - -emit-interface-stubs -std=c99 -xc %s | \ // RUN: FileCheck -check-prefix=CHECK-NOINLINE %s // RUN: %clang -DINLINE="__attribute__((noinline))" -target x86_64-linux-gnu \ // RUN: -O0 -o - -c -std=c99 -xc %s | llvm-nm - 2>&1 | \ // RUN: FileCheck -check-prefix=CHECK-NOINLINE %s -// RUN: %clang -DINLINE="static" -target x86_64-unknown-linux-gnu -o - \ -// RUN: -emit-interface-stubs \ -// RUN: -interface-stub-version=experimental-ifs-v1 -std=c99 -xc %s | \ +// RUN: %clang -c -DINLINE="static" -target x86_64-unknown-linux-gnu -o - \ +// RUN: -emit-interface-stubs -std=c99 -xc %s | \ // RUN: FileCheck -check-prefix=CHECK-STATIC %s // RUN: %clang -DINLINE="static" -target x86_64-linux-gnu -O0 -o - -c \ // RUN: -std=c99 -xc %s | llvm-nm - 2>&1 | \ @@ -50,12 +45,12 @@ INLINE int foo() { return var; } -// RUN: %clang -DINLINE=inline -target x86_64-linux-gnu -o - \ -// RUN: -emit-interface-stubs -interface-stub-version=experimental-ifs-v1 \ +// RUN: %clang -c -DINLINE=inline -target x86_64-linux-gnu -o - \ +// RUN: -emit-interface-stubs \ // RUN: -std=gnu89 -xc %s | FileCheck -check-prefix=CHECK-TAPI %s -// RUN: %clang -DINLINE=inline -target x86_64-linux-gnu -o - \ -// RUN: -emit-interface-stubs -interface-stub-version=experimental-ifs-v1 \ +// RUN: %clang -c -DINLINE=inline -target x86_64-linux-gnu -o - \ +// RUN: -emit-interface-stubs \ // RUN: -std=gnu89 -xc %s | FileCheck -check-prefix=CHECK-SYMBOLS %s // RUN: %clang -DINLINE=inline -target x86_64-linux-gnu -o - \ // RUN: -c -std=gnu89 -xc %s | llvm-nm - 2>&1 | \ diff --git a/clang/test/InterfaceStubs/merge-conflict-test.c b/clang/test/InterfaceStubs/merge-conflict-test.c new file mode 100644 index 0000000000000..2d006148ba463 --- /dev/null +++ b/clang/test/InterfaceStubs/merge-conflict-test.c @@ -0,0 +1,3 @@ +// RUN: not %clang -o libfoo.so -emit-interface-stubs %s %S/driver-test.c 2>&1 | FileCheck %s +// CHECK: error: Interface Stub: Type Mismatch +int foo; \ No newline at end of file diff --git a/clang/test/InterfaceStubs/object-double.c b/clang/test/InterfaceStubs/object-double.c new file mode 100644 index 0000000000000..c6d2b61a4d9e3 --- /dev/null +++ b/clang/test/InterfaceStubs/object-double.c @@ -0,0 +1,5 @@ +// RUN: not %clang -o - -emit-interface-stubs %s %S/object.c 2>&1 | FileCheck %s +// Need to encode more type info or weak vs strong symbol resolution in llvm-ifs +// XFAIL: * +// CHECK: error: Interface Stub: Size Mismatch +float data = 42.0; \ No newline at end of file diff --git a/clang/test/InterfaceStubs/object-float.c b/clang/test/InterfaceStubs/object-float.c new file mode 100644 index 0000000000000..6dd056bb852ec --- /dev/null +++ b/clang/test/InterfaceStubs/object-float.c @@ -0,0 +1,3 @@ +// RUN: not %clang -o - -emit-interface-stubs %s %S/object.c 2>&1 | FileCheck %s +// CHECK: error: Interface Stub: Size Mismatch +double data = 42.0; \ No newline at end of file diff --git a/clang/test/InterfaceStubs/object.c b/clang/test/InterfaceStubs/object.c new file mode 100644 index 0000000000000..b07bea930e56c --- /dev/null +++ b/clang/test/InterfaceStubs/object.c @@ -0,0 +1,7 @@ +// RUN: %clang -c -o - -emit-interface-stubs %s | FileCheck -check-prefix=CHECK-TAPI %s +// RUN: %clang -c -o - -emit-interface-stubs %s | FileCheck -check-prefix=CHECK-SYMBOLS %s +// RUN: %clang -c -o - %s | llvm-nm - 2>&1 | FileCheck -check-prefix=CHECK-SYMBOLS %s + +// CHECK-TAPI: data: { Type: Object, Size: 4 } +// CHECK-SYMBOLS: data +int data = 42; diff --git a/clang/test/InterfaceStubs/object.cpp b/clang/test/InterfaceStubs/object.cpp deleted file mode 100644 index 7f11fe6bf3cbc..0000000000000 --- a/clang/test/InterfaceStubs/object.cpp +++ /dev/null @@ -1,14 +0,0 @@ -// REQUIRES: x86-registered-target -// RUN: %clang -target x86_64-unknown-linux-gnu -o - -emit-interface-stubs \ -// RUN: -interface-stub-version=experimental-ifs-v1 %s | \ -// RUN: FileCheck -check-prefix=CHECK-TAPI %s - -// RUN: %clang -target x86_64-unknown-linux-gnu -o - -emit-interface-stubs \ -// RUN: -interface-stub-version=experimental-ifs-v1 %s | \ -// RUN: FileCheck -check-prefix=CHECK-SYMBOLS %s -// RUN: %clang -target x86_64-unknown-linux-gnu -o - -c %s | llvm-nm - 2>&1 | \ -// RUN: FileCheck -check-prefix=CHECK-SYMBOLS %s - -// CHECK-TAPI: data: { Type: Object, Size: 4 } -// CHECK-SYMBOLS: data -int data = 42; diff --git a/clang/test/InterfaceStubs/object.ifs b/clang/test/InterfaceStubs/object.ifs new file mode 100644 index 0000000000000..7dc1134bac937 --- /dev/null +++ b/clang/test/InterfaceStubs/object.ifs @@ -0,0 +1,28 @@ +# RUN: %clang -emit-interface-stubs -o - -emit-merged-ifs %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-IFS + +# RUN: %clang -emit-interface-stubs -o - %s | llvm-readelf --all | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ELF + +# CHECK-IFS: --- !experimental-ifs-v1 +# CHECK-IFS-NEXT: IfsVersion: 1.0 +# CHECK-IFS-NEXT: Triple: x86_64-linux-gnu +# CHECK-IFS-NEXT: ObjectFileFormat: ELF +# CHECK-IFS-NEXT: Symbols: +# CHECK-IFS-NEXT: b: { Type: Object, Size: 4 } +# CHECK-IFS-NEXT: ... + +# CHECK-ELF: ELF Header: +# CHECK-ELF: Class: ELF64 +# CHECK-ELF: Data: 2's complement, little endian +# CHECK-ELF: Type: DYN (Shared object file) +# CHECK-ELF-NOT: FUNC GLOBAL DEFAULT 1 a +# CHECK-ELF: OBJECT GLOBAL DEFAULT 1 b + +--- !experimental-ifs-v1 +IfsVersion: 1.0 +Triple: x86_64-linux-gnu +ObjectFileFormat: ELF +Symbols: + b: { Type: Object, Size: 4 } +... diff --git a/clang/test/InterfaceStubs/template-namespace-function.cpp b/clang/test/InterfaceStubs/template-namespace-function.cpp index 4956525b4060f..ad8606757c9e4 100644 --- a/clang/test/InterfaceStubs/template-namespace-function.cpp +++ b/clang/test/InterfaceStubs/template-namespace-function.cpp @@ -1,5 +1,5 @@ // REQUIRES: x86-registered-target -// RUN: %clang -target x86_64-unknown-linux-gnu -o - -emit-interface-stubs \ +// RUN: %clang -target x86_64-unknown-linux-gnu -o - -emit-interface-stubs -emit-merged-ifs \ // RUN: -interface-stub-version=experimental-ifs-v1 %s | \ // RUN: FileCheck %s diff --git a/clang/test/InterfaceStubs/weak.cpp b/clang/test/InterfaceStubs/weak.cpp index 13b7e5aa29383..e1c2c232a9d8f 100644 --- a/clang/test/InterfaceStubs/weak.cpp +++ b/clang/test/InterfaceStubs/weak.cpp @@ -1,5 +1,5 @@ // REQUIRES: x86-registered-target -// RUN: %clang -target x86_64-linux-gnu -o - -emit-interface-stubs \ +// RUN: %clang -target x86_64-linux-gnu -o - -emit-interface-stubs -emit-merged-ifs \ // RUN: -interface-stub-version=experimental-ifs-v1 %s | \ // RUN: FileCheck %s diff --git a/clang/test/lit.cfg.py b/clang/test/lit.cfg.py index 9ffe30ec50d52..1ffb6d094d72c 100644 --- a/clang/test/lit.cfg.py +++ b/clang/test/lit.cfg.py @@ -26,7 +26,7 @@ # suffixes: A list of file extensions to treat as test files. config.suffixes = ['.c', '.cpp', '.cppm', '.m', '.mm', '.cu', - '.ll', '.cl', '.s', '.S', '.modulemap', '.test', '.rs'] + '.ll', '.cl', '.s', '.S', '.modulemap', '.test', '.rs', '.ifs'] # excludes: A list of directories to exclude from the testsuite. The 'Inputs' # subdirectories contain auxiliary inputs for various tests in their parent @@ -61,7 +61,7 @@ tool_dirs = [config.clang_tools_dir, config.llvm_tools_dir] tools = [ - 'c-index-test', 'clang-diff', 'clang-format', 'clang-tblgen', 'opt', + 'c-index-test', 'clang-diff', 'clang-format', 'clang-tblgen', 'opt', 'llvm-ifs', ToolSubst('%clang_extdef_map', command=FindTool( 'clang-extdef-mapping'), unresolved='ignore'), ]