diff --git a/.github/workflows/llvm-project-tests.yml b/.github/workflows/llvm-project-tests.yml index 68b4a68d1af984..43b90193406fc9 100644 --- a/.github/workflows/llvm-project-tests.yml +++ b/.github/workflows/llvm-project-tests.yml @@ -58,6 +58,10 @@ jobs: lit-tests: name: Lit Tests runs-on: ${{ matrix.os }} + container: + image: ${{(startsWith(matrix.os, 'ubuntu') && 'ghcr.io/llvm/ci-ubuntu-22.04:latest') || null}} + volumes: + - /mnt/:/mnt/ strategy: fail-fast: false matrix: @@ -77,6 +81,7 @@ jobs: with: python-version: ${{ inputs.python_version }} - name: Install Ninja + if: runner.os != 'Linux' uses: llvm/actions/install-ninja@main # actions/checkout deletes any existing files in the new git directory, # so this needs to either run before ccache-action or it has to use @@ -108,8 +113,8 @@ jobs: run: | if [ "${{ runner.os }}" == "Linux" ]; then builddir="/mnt/build/" - sudo mkdir -p $builddir - sudo chown `whoami`:`whoami` $builddir + mkdir -p $builddir + extra_cmake_args="-DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COMPILER=clang" else builddir="$(pwd)"/build fi @@ -123,6 +128,7 @@ jobs: -DLLDB_INCLUDE_TESTS=OFF \ -DCMAKE_C_COMPILER_LAUNCHER=sccache \ -DCMAKE_CXX_COMPILER_LAUNCHER=sccache \ + $extra_cmake_args \ ${{ inputs.extra_cmake_args }} ninja -C "$builddir" '${{ inputs.build_target }}' diff --git a/.github/workflows/release-tasks.yml b/.github/workflows/release-tasks.yml index f2a831ad3577ad..53da8662b0203a 100644 --- a/.github/workflows/release-tasks.yml +++ b/.github/workflows/release-tasks.yml @@ -28,6 +28,7 @@ jobs: name: Create a New Release runs-on: ubuntu-latest needs: validate-tag + steps: - name: Install Dependencies run: | @@ -40,8 +41,9 @@ jobs: - name: Create Release env: GITHUB_TOKEN: ${{ github.token }} + USER_TOKEN: ${{ secrets.RELEASE_TASKS_USER_TOKEN }} run: | - ./llvm/utils/release/./github-upload-release.py --token "$GITHUB_TOKEN" --release ${{ needs.validate-tag.outputs.release-version }} --user ${{ github.actor }} create + ./llvm/utils/release/./github-upload-release.py --token "$GITHUB_TOKEN" --release ${{ needs.validate-tag.outputs.release-version }} --user ${{ github.actor }} --user-token "$USER_TOKEN" create release-documentation: name: Build and Upload Release Documentation needs: diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 95d44951ae7ee6..93a67e7a895592 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -171,6 +171,22 @@ AST Dumping Potentially Breaking Changes "qualType": "foo" } +Clang Frontend Potentially Breaking Changes +------------------------------------------- +- Target OS macros extension + A new Clang extension (see :ref:`here `) is enabled for + Darwin (Apple platform) targets. Clang now defines ``TARGET_OS_*`` macros for + these targets, which could break existing code bases with improper checks for + the ``TARGET_OS_`` macros. For example, existing checks might fail to include + the ``TargetConditionals.h`` header from Apple SDKs and therefore leaving the + macros undefined and guarded code unexercised. + + Affected code should be checked to see if it's still intended for the specific + target and fixed accordingly. + + The extension can be turned off by the option ``-fno-define-target-os-macros`` + as a workaround. + What's New in Clang |release|? ============================== Some of the major new features and improvements to Clang are listed @@ -304,6 +320,10 @@ Non-comprehensive list of changes in this release * The version of Unicode used by Clang (primarily to parse identifiers) has been updated to 15.1. +* Clang now defines macro ``__LLVM_INSTR_PROFILE_GENERATE`` when compiling with + PGO instrumentation profile generation, and ``__LLVM_INSTR_PROFILE_USE`` when + compiling with PGO profile use. + New Compiler Flags ------------------ @@ -344,6 +364,17 @@ New Compiler Flags attribute the replaceable global new and delete operators behave normally (like other functions) with respect to visibility attributes, pragmas and options (e.g ``--fvisibility=``). +* Full register names can be used when printing assembly via ``-mregnames``. + This option now matches the one used by GCC. + +.. _target_os_detail: + +* ``-fdefine-target-os-macros`` and its complement + ``-fno-define-target-os-macros``. Enables or disables the Clang extension to + provide built-in definitions of a list of ``TARGET_OS_*`` macros based on the + target triple. + + The extension is enabled by default for Darwin (Apple platform) targets. Deprecated Compiler Flags ------------------------- @@ -363,6 +394,7 @@ Modified Compiler Flags * ``-fvisibility-global-new-delete-hidden`` is now a deprecated spelling of ``-fvisibility-global-new-delete=force-hidden`` (``-fvisibility-global-new-delete=`` is new in this release). +* ``-fprofile-update`` is enabled for ``-fprofile-generate``. Removed Compiler Flags ------------------------- @@ -860,6 +892,9 @@ Bug Fixes in This Version Fixes (`#78290 `_) - Fixed assertion failure with deleted overloaded unary operators. Fixes (`#78314 `_) +- The XCOFF object file format does not support aliases to symbols having common + linkage. Clang now diagnoses the use of an alias for a common symbol when + compiling for AIX. - Clang now doesn't produce false-positive warning `-Wconstant-logical-operand` for logical operators in C23. @@ -1261,6 +1296,16 @@ CUDA Support - Clang now supports CUDA SDK up to 12.3 - Added support for sm_90a +PowerPC Support +^^^^^^^^^^^^^^^ + +- Added ``nmmintrin.h`` to intrinsics headers. +- Added ``__builtin_ppc_fence`` as barrier of code motion, and + ``__builtin_ppc_mffsl`` for corresponding instruction. +- Supported ``__attribute__((target("tune=cpu")))``. +- Emit ``float-abi`` module flag on 64-bit ELFv2 PowerPC targets if + ``long double`` type is used in current module. + AIX Support ^^^^^^^^^^^ @@ -1269,6 +1314,10 @@ AIX Support base is encoded as an immediate operand. This access sequence is not used for TLS variables larger than 32KB, and is currently only supported on 64-bit mode. +- Inline assembler supports VSR register in pure digits. +- Enabled ThinLTO support. Requires AIX 7.2 TL5 SP7 or newer, or AIX 7.3 TL2 + or newer. Similar to the LTO support on AIX, ThinLTO is implemented with + the libLTO.so plugin. WebAssembly Support ^^^^^^^^^^^^^^^^^^^ @@ -1332,6 +1381,8 @@ libclang - Exposed arguments of ``clang::annotate``. - ``clang::getCursorKindForDecl`` now recognizes linkage specifications such as ``extern "C"`` and reports them as ``CXCursor_LinkageSpec``. +- Changed the libclang library on AIX to export only the necessary symbols to + prevent issues of resolving to the wrong duplicate symbol. Static Analyzer --------------- @@ -1343,9 +1394,6 @@ New features of static analysis tools, such as the Clang Static Analyzer. `Documentation `__. -- Added support for the ``cleanup`` attribute. - `Documentation `__. - - Support "Deducing this" (P0847R7). (Worked out of the box) (`af4751738db8 `__) diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index e8d03fc2690235..175bedbfb4d01c 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -5815,6 +5815,18 @@ def mvis3 : Flag<["-"], "mvis3">, Group; def mno_vis3 : Flag<["-"], "mno-vis3">, Group; def mhard_quad_float : Flag<["-"], "mhard-quad-float">, Group; def msoft_quad_float : Flag<["-"], "msoft-quad-float">, Group; +foreach i = 1 ... 7 in + def ffixed_g#i : Flag<["-"], "ffixed-g"#i>, Group, + HelpText<"Reserve the G"#i#" register (SPARC only)">; +foreach i = 0 ... 5 in + def ffixed_o#i : Flag<["-"], "ffixed-o"#i>, Group, + HelpText<"Reserve the O"#i#" register (SPARC only)">; +foreach i = 0 ... 7 in + def ffixed_l#i : Flag<["-"], "ffixed-l"#i>, Group, + HelpText<"Reserve the L"#i#" register (SPARC only)">; +foreach i = 0 ... 5 in + def ffixed_i#i : Flag<["-"], "ffixed-i"#i>, Group, + HelpText<"Reserve the I"#i#" register (SPARC only)">; } // let Flags = [TargetSpecific] // M68k features flags diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp index 336b7a5e3d727d..3036f461c1ded1 100644 --- a/clang/lib/Basic/Targets/AArch64.cpp +++ b/clang/lib/Basic/Targets/AArch64.cpp @@ -1187,6 +1187,8 @@ TargetInfo::BuiltinVaListKind AArch64TargetInfo::getBuiltinVaListKind() const { } const char *const AArch64TargetInfo::GCCRegNames[] = { + // clang-format off + // 32-bit Integer registers "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7", "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15", "w16", "w17", "w18", "w19", "w20", "w21", "w22", @@ -1223,7 +1225,12 @@ const char *const AArch64TargetInfo::GCCRegNames[] = { // SVE predicate-as-counter registers "pn0", "pn1", "pn2", "pn3", "pn4", "pn5", "pn6", "pn7", "pn8", - "pn9", "pn10", "pn11", "pn12", "pn13", "pn14", "pn15" + "pn9", "pn10", "pn11", "pn12", "pn13", "pn14", "pn15", + + // SME registers + "za", "zt0", + + // clang-format on }; ArrayRef AArch64TargetInfo::getGCCRegNames() const { diff --git a/clang/lib/Driver/ToolChains/Arch/Sparc.cpp b/clang/lib/Driver/ToolChains/Arch/Sparc.cpp index 22e583021515e5..ae1a4ba7882627 100644 --- a/clang/lib/Driver/ToolChains/Arch/Sparc.cpp +++ b/clang/lib/Driver/ToolChains/Arch/Sparc.cpp @@ -178,4 +178,85 @@ void sparc::getSparcTargetFeatures(const Driver &D, const ArgList &Args, else Features.push_back("-hard-quad-float"); } + + if (Args.hasArg(options::OPT_ffixed_g1)) + Features.push_back("+reserve-g1"); + + if (Args.hasArg(options::OPT_ffixed_g2)) + Features.push_back("+reserve-g2"); + + if (Args.hasArg(options::OPT_ffixed_g3)) + Features.push_back("+reserve-g3"); + + if (Args.hasArg(options::OPT_ffixed_g4)) + Features.push_back("+reserve-g4"); + + if (Args.hasArg(options::OPT_ffixed_g5)) + Features.push_back("+reserve-g5"); + + if (Args.hasArg(options::OPT_ffixed_g6)) + Features.push_back("+reserve-g6"); + + if (Args.hasArg(options::OPT_ffixed_g7)) + Features.push_back("+reserve-g7"); + + if (Args.hasArg(options::OPT_ffixed_o0)) + Features.push_back("+reserve-o0"); + + if (Args.hasArg(options::OPT_ffixed_o1)) + Features.push_back("+reserve-o1"); + + if (Args.hasArg(options::OPT_ffixed_o2)) + Features.push_back("+reserve-o2"); + + if (Args.hasArg(options::OPT_ffixed_o3)) + Features.push_back("+reserve-o3"); + + if (Args.hasArg(options::OPT_ffixed_o4)) + Features.push_back("+reserve-o4"); + + if (Args.hasArg(options::OPT_ffixed_o5)) + Features.push_back("+reserve-o5"); + + if (Args.hasArg(options::OPT_ffixed_l0)) + Features.push_back("+reserve-l0"); + + if (Args.hasArg(options::OPT_ffixed_l1)) + Features.push_back("+reserve-l1"); + + if (Args.hasArg(options::OPT_ffixed_l2)) + Features.push_back("+reserve-l2"); + + if (Args.hasArg(options::OPT_ffixed_l3)) + Features.push_back("+reserve-l3"); + + if (Args.hasArg(options::OPT_ffixed_l4)) + Features.push_back("+reserve-l4"); + + if (Args.hasArg(options::OPT_ffixed_l5)) + Features.push_back("+reserve-l5"); + + if (Args.hasArg(options::OPT_ffixed_l6)) + Features.push_back("+reserve-l6"); + + if (Args.hasArg(options::OPT_ffixed_l7)) + Features.push_back("+reserve-l7"); + + if (Args.hasArg(options::OPT_ffixed_i0)) + Features.push_back("+reserve-i0"); + + if (Args.hasArg(options::OPT_ffixed_i1)) + Features.push_back("+reserve-i1"); + + if (Args.hasArg(options::OPT_ffixed_i2)) + Features.push_back("+reserve-i2"); + + if (Args.hasArg(options::OPT_ffixed_i3)) + Features.push_back("+reserve-i3"); + + if (Args.hasArg(options::OPT_ffixed_i4)) + Features.push_back("+reserve-i4"); + + if (Args.hasArg(options::OPT_ffixed_i5)) + Features.push_back("+reserve-i5"); } diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp index b904e0e56d9eb3..57391979887078 100644 --- a/clang/lib/Format/UnwrappedLineParser.cpp +++ b/clang/lib/Format/UnwrappedLineParser.cpp @@ -2515,7 +2515,7 @@ bool UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType) { parseChildBlock(); break; case tok::r_paren: - if (!MightBeStmtExpr && + if (!MightBeStmtExpr && !Line->InMacroBody && Style.RemoveParentheses > FormatStyle::RPS_Leave) { const auto *Prev = LeftParen->Previous; const auto *Next = Tokens->peekNextToken(); diff --git a/clang/lib/Serialization/ASTReaderDecl.cpp b/clang/lib/Serialization/ASTReaderDecl.cpp index 1fadd8039462d4..321c11e55c14e3 100644 --- a/clang/lib/Serialization/ASTReaderDecl.cpp +++ b/clang/lib/Serialization/ASTReaderDecl.cpp @@ -800,11 +800,12 @@ void ASTDeclReader::VisitEnumDecl(EnumDecl *ED) { BitsUnpacker EnumDeclBits(Record.readInt()); ED->setNumPositiveBits(EnumDeclBits.getNextBits(/*Width=*/8)); ED->setNumNegativeBits(EnumDeclBits.getNextBits(/*Width=*/8)); + bool ShouldSkipCheckingODR = EnumDeclBits.getNextBit(); ED->setScoped(EnumDeclBits.getNextBit()); ED->setScopedUsingClassTag(EnumDeclBits.getNextBit()); ED->setFixed(EnumDeclBits.getNextBit()); - if (!shouldSkipCheckingODR(ED)) { + if (!ShouldSkipCheckingODR) { ED->setHasODRHash(true); ED->ODRHash = Record.readInt(); } @@ -1073,6 +1074,7 @@ void ASTDeclReader::VisitFunctionDecl(FunctionDecl *FD) { FD->setCachedLinkage((Linkage)FunctionDeclBits.getNextBits(/*Width=*/3)); FD->setStorageClass((StorageClass)FunctionDeclBits.getNextBits(/*Width=*/3)); + bool ShouldSkipCheckingODR = FunctionDeclBits.getNextBit(); FD->setInlineSpecified(FunctionDeclBits.getNextBit()); FD->setImplicitlyInline(FunctionDeclBits.getNextBit()); FD->setHasSkippedBody(FunctionDeclBits.getNextBit()); @@ -1102,7 +1104,7 @@ void ASTDeclReader::VisitFunctionDecl(FunctionDecl *FD) { if (FD->isExplicitlyDefaulted()) FD->setDefaultLoc(readSourceLocation()); - if (!shouldSkipCheckingODR(FD)) { + if (!ShouldSkipCheckingODR) { FD->ODRHash = Record.readInt(); FD->setHasODRHash(true); } @@ -1973,6 +1975,8 @@ void ASTDeclReader::ReadCXXDefinitionData( BitsUnpacker CXXRecordDeclBits = Record.readInt(); + bool ShouldSkipCheckingODR = CXXRecordDeclBits.getNextBit(); + #define FIELD(Name, Width, Merge) \ if (!CXXRecordDeclBits.canGetNextNBits(Width)) \ CXXRecordDeclBits.updateValue(Record.readInt()); \ @@ -1982,7 +1986,7 @@ void ASTDeclReader::ReadCXXDefinitionData( #undef FIELD // We only perform ODR checks for decls not in GMF. - if (!shouldSkipCheckingODR(D)) { + if (!ShouldSkipCheckingODR) { // Note: the caller has deserialized the IsLambda bit already. Data.ODRHash = Record.readInt(); Data.HasODRHash = true; diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index 3b79a9238d1af3..73018c1170d8f2 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -6010,6 +6010,9 @@ void ASTRecordWriter::AddCXXDefinitionData(const CXXRecordDecl *D) { BitsPacker DefinitionBits; + bool ShouldSkipCheckingODR = shouldSkipCheckingODR(D); + DefinitionBits.addBit(ShouldSkipCheckingODR); + #define FIELD(Name, Width, Merge) \ if (!DefinitionBits.canWriteNextNBits(Width)) { \ Record->push_back(DefinitionBits); \ @@ -6023,11 +6026,10 @@ void ASTRecordWriter::AddCXXDefinitionData(const CXXRecordDecl *D) { Record->push_back(DefinitionBits); // We only perform ODR checks for decls not in GMF. - if (!shouldSkipCheckingODR(D)) { + if (!ShouldSkipCheckingODR) // getODRHash will compute the ODRHash if it has not been previously // computed. Record->push_back(D->getODRHash()); - } bool ModulesDebugInfo = Writer->Context->getLangOpts().ModulesDebugInfo && !D->isDependentType(); diff --git a/clang/lib/Serialization/ASTWriterDecl.cpp b/clang/lib/Serialization/ASTWriterDecl.cpp index f224075643e998..e73800100e3ccf 100644 --- a/clang/lib/Serialization/ASTWriterDecl.cpp +++ b/clang/lib/Serialization/ASTWriterDecl.cpp @@ -488,13 +488,15 @@ void ASTDeclWriter::VisitEnumDecl(EnumDecl *D) { BitsPacker EnumDeclBits; EnumDeclBits.addBits(D->getNumPositiveBits(), /*BitWidth=*/8); EnumDeclBits.addBits(D->getNumNegativeBits(), /*BitWidth=*/8); + bool ShouldSkipCheckingODR = shouldSkipCheckingODR(D); + EnumDeclBits.addBit(ShouldSkipCheckingODR); EnumDeclBits.addBit(D->isScoped()); EnumDeclBits.addBit(D->isScopedUsingClassTag()); EnumDeclBits.addBit(D->isFixed()); Record.push_back(EnumDeclBits); // We only perform ODR checks for decls not in GMF. - if (!shouldSkipCheckingODR(D)) + if (!ShouldSkipCheckingODR) Record.push_back(D->getODRHash()); if (MemberSpecializationInfo *MemberInfo = D->getMemberSpecializationInfo()) { @@ -678,6 +680,8 @@ void ASTDeclWriter::VisitFunctionDecl(FunctionDecl *D) { // FIXME: stable encoding FunctionDeclBits.addBits(llvm::to_underlying(D->getLinkageInternal()), 3); FunctionDeclBits.addBits((uint32_t)D->getStorageClass(), /*BitWidth=*/3); + bool ShouldSkipCheckingODR = shouldSkipCheckingODR(D); + FunctionDeclBits.addBit(ShouldSkipCheckingODR); FunctionDeclBits.addBit(D->isInlineSpecified()); FunctionDeclBits.addBit(D->isInlined()); FunctionDeclBits.addBit(D->hasSkippedBody()); @@ -704,7 +708,7 @@ void ASTDeclWriter::VisitFunctionDecl(FunctionDecl *D) { Record.AddSourceLocation(D->getDefaultLoc()); // We only perform ODR checks for decls not in GMF. - if (!shouldSkipCheckingODR(D)) + if (!ShouldSkipCheckingODR) Record.push_back(D->getODRHash()); if (D->isDefaulted()) { @@ -2137,12 +2141,13 @@ getFunctionDeclAbbrev(serialization::DeclCode Code) { Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 11)); // IDNS Abv->Add(BitCodeAbbrevOp( BitCodeAbbrevOp::Fixed, - 27)); // Packed Function Bits: StorageClass, Inline, InlineSpecified, + 28)); // Packed Function Bits: StorageClass, Inline, InlineSpecified, // VirtualAsWritten, Pure, HasInheritedProto, HasWrittenProto, // Deleted, Trivial, TrivialForCall, Defaulted, ExplicitlyDefaulted, // IsIneligibleOrNotSelected, ImplicitReturnZero, Constexpr, // UsesSEHTry, SkippedBody, MultiVersion, LateParsed, - // FriendConstraintRefersToEnclosingTemplate, Linkage + // FriendConstraintRefersToEnclosingTemplate, Linkage, + // ShouldSkipCheckingODR Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // LocEnd Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // ODRHash // This Array slurps the rest of the record. Fortunately we want to encode @@ -2269,7 +2274,7 @@ void ASTWriter::WriteDeclAbbrevs() { Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // AddTypeRef Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // IntegerType Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // getPromotionType - Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 19)); // Enum Decl Bits + Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 20)); // Enum Decl Bits Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));// ODRHash Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // InstantiatedMembEnum // DC diff --git a/clang/test/CodeGen/aarch64-inline-asm.c b/clang/test/CodeGen/aarch64-inline-asm.c index 75e9a8c46b8769..8ddee560b11da4 100644 --- a/clang/test/CodeGen/aarch64-inline-asm.c +++ b/clang/test/CodeGen/aarch64-inline-asm.c @@ -95,3 +95,11 @@ void test_reduced_gpr_constraints(int var32, long var64) { // CHECK: [[ARG2:%.+]] = load i64, ptr // CHECK: call void asm sideeffect "add x0, x0, $0", "@3Ucj,~{x0}"(i64 [[ARG2]]) } + +void test_sme_constraints(){ + asm("movt zt0[3, mul vl], z0" : : : "za"); +// CHECK: call void asm sideeffect "movt zt0[3, mul vl], z0", "~{za}"() + + asm("movt zt0[3, mul vl], z0" : : : "zt0"); +// CHECK: call void asm sideeffect "movt zt0[3, mul vl], z0", "~{zt0}"() +} \ No newline at end of file diff --git a/clang/test/Driver/sparc-fixed-register.c b/clang/test/Driver/sparc-fixed-register.c new file mode 100644 index 00000000000000..24880b9c9d86fd --- /dev/null +++ b/clang/test/Driver/sparc-fixed-register.c @@ -0,0 +1,181 @@ +// RUN: %clang --target=sparc-none-gnu -ffixed-g1 -### %s 2> %t +// RUN: FileCheck --check-prefix=CHECK-FIXED-G1 < %t %s +// CHECK-FIXED-G1: "-target-feature" "+reserve-g1" + +// RUN: %clang --target=sparc-none-gnu -ffixed-g2 -### %s 2> %t +// RUN: FileCheck --check-prefix=CHECK-FIXED-G2 < %t %s +// CHECK-FIXED-G2: "-target-feature" "+reserve-g2" + +// RUN: %clang --target=sparc-none-gnu -ffixed-g3 -### %s 2> %t +// RUN: FileCheck --check-prefix=CHECK-FIXED-G3 < %t %s +// CHECK-FIXED-G3: "-target-feature" "+reserve-g3" + +// RUN: %clang --target=sparc-none-gnu -ffixed-g4 -### %s 2> %t +// RUN: FileCheck --check-prefix=CHECK-FIXED-G4 < %t %s +// CHECK-FIXED-G4: "-target-feature" "+reserve-g4" + +// RUN: %clang --target=sparc-none-gnu -ffixed-g5 -### %s 2> %t +// RUN: FileCheck --check-prefix=CHECK-FIXED-G5 < %t %s +// CHECK-FIXED-G5: "-target-feature" "+reserve-g5" + +// RUN: %clang --target=sparc-none-gnu -ffixed-g6 -### %s 2> %t +// RUN: FileCheck --check-prefix=CHECK-FIXED-G6 < %t %s +// CHECK-FIXED-G6: "-target-feature" "+reserve-g6" + +// RUN: %clang --target=sparc-none-gnu -ffixed-g7 -### %s 2> %t +// RUN: FileCheck --check-prefix=CHECK-FIXED-G7 < %t %s +// CHECK-FIXED-G7: "-target-feature" "+reserve-g7" + +// RUN: %clang --target=sparc-none-gnu -ffixed-o0 -### %s 2> %t +// RUN: FileCheck --check-prefix=CHECK-FIXED-O0 < %t %s +// CHECK-FIXED-O0: "-target-feature" "+reserve-o0" + +// RUN: %clang --target=sparc-none-gnu -ffixed-o1 -### %s 2> %t +// RUN: FileCheck --check-prefix=CHECK-FIXED-O1 < %t %s +// CHECK-FIXED-O1: "-target-feature" "+reserve-o1" + +// RUN: %clang --target=sparc-none-gnu -ffixed-o2 -### %s 2> %t +// RUN: FileCheck --check-prefix=CHECK-FIXED-O2 < %t %s +// CHECK-FIXED-O2: "-target-feature" "+reserve-o2" + +// RUN: %clang --target=sparc-none-gnu -ffixed-o3 -### %s 2> %t +// RUN: FileCheck --check-prefix=CHECK-FIXED-O3 < %t %s +// CHECK-FIXED-O3: "-target-feature" "+reserve-o3" + +// RUN: %clang --target=sparc-none-gnu -ffixed-o4 -### %s 2> %t +// RUN: FileCheck --check-prefix=CHECK-FIXED-O4 < %t %s +// CHECK-FIXED-O4: "-target-feature" "+reserve-o4" + +// RUN: %clang --target=sparc-none-gnu -ffixed-o5 -### %s 2> %t +// RUN: FileCheck --check-prefix=CHECK-FIXED-O5 < %t %s +// CHECK-FIXED-O5: "-target-feature" "+reserve-o5" + +// RUN: %clang --target=sparc-none-gnu -ffixed-l0 -### %s 2> %t +// RUN: FileCheck --check-prefix=CHECK-FIXED-L0 < %t %s +// CHECK-FIXED-L0: "-target-feature" "+reserve-l0" + +// RUN: %clang --target=sparc-none-gnu -ffixed-l1 -### %s 2> %t +// RUN: FileCheck --check-prefix=CHECK-FIXED-L1 < %t %s +// CHECK-FIXED-L1: "-target-feature" "+reserve-l1" + +// RUN: %clang --target=sparc-none-gnu -ffixed-l2 -### %s 2> %t +// RUN: FileCheck --check-prefix=CHECK-FIXED-L2 < %t %s +// CHECK-FIXED-L2: "-target-feature" "+reserve-l2" + +// RUN: %clang --target=sparc-none-gnu -ffixed-l3 -### %s 2> %t +// RUN: FileCheck --check-prefix=CHECK-FIXED-L3 < %t %s +// CHECK-FIXED-L3: "-target-feature" "+reserve-l3" + +// RUN: %clang --target=sparc-none-gnu -ffixed-l4 -### %s 2> %t +// RUN: FileCheck --check-prefix=CHECK-FIXED-L4 < %t %s +// CHECK-FIXED-L4: "-target-feature" "+reserve-l4" + +// RUN: %clang --target=sparc-none-gnu -ffixed-l5 -### %s 2> %t +// RUN: FileCheck --check-prefix=CHECK-FIXED-L5 < %t %s +// CHECK-FIXED-L5: "-target-feature" "+reserve-l5" + +// RUN: %clang --target=sparc-none-gnu -ffixed-l6 -### %s 2> %t +// RUN: FileCheck --check-prefix=CHECK-FIXED-L6 < %t %s +// CHECK-FIXED-L6: "-target-feature" "+reserve-l6" + +// RUN: %clang --target=sparc-none-gnu -ffixed-l7 -### %s 2> %t +// RUN: FileCheck --check-prefix=CHECK-FIXED-L7 < %t %s +// CHECK-FIXED-L7: "-target-feature" "+reserve-l7" + +// RUN: %clang --target=sparc-none-gnu -ffixed-i0 -### %s 2> %t +// RUN: FileCheck --check-prefix=CHECK-FIXED-I0 < %t %s +// CHECK-FIXED-I0: "-target-feature" "+reserve-i0" + +// RUN: %clang --target=sparc-none-gnu -ffixed-i1 -### %s 2> %t +// RUN: FileCheck --check-prefix=CHECK-FIXED-I1 < %t %s +// CHECK-FIXED-I1: "-target-feature" "+reserve-i1" + +// RUN: %clang --target=sparc-none-gnu -ffixed-i2 -### %s 2> %t +// RUN: FileCheck --check-prefix=CHECK-FIXED-I2 < %t %s +// CHECK-FIXED-I2: "-target-feature" "+reserve-i2" + +// RUN: %clang --target=sparc-none-gnu -ffixed-i3 -### %s 2> %t +// RUN: FileCheck --check-prefix=CHECK-FIXED-I3 < %t %s +// CHECK-FIXED-I3: "-target-feature" "+reserve-i3" + +// RUN: %clang --target=sparc-none-gnu -ffixed-i4 -### %s 2> %t +// RUN: FileCheck --check-prefix=CHECK-FIXED-I4 < %t %s +// CHECK-FIXED-I4: "-target-feature" "+reserve-i4" + +// RUN: %clang --target=sparc-none-gnu -ffixed-i5 -### %s 2> %t +// RUN: FileCheck --check-prefix=CHECK-FIXED-I5 < %t %s +// CHECK-FIXED-I5: "-target-feature" "+reserve-i5" + +// Test multiple of reserve-* options together. +// RUN: %clang --target=sparc-none-gnu \ +// RUN: -ffixed-g1 \ +// RUN: -ffixed-o2 \ +// RUN: -ffixed-l3 \ +// RUN: -ffixed-i4 \ +// RUN: -### %s 2> %t +// RUN: FileCheck \ +// RUN: --check-prefix=CHECK-FIXED-G1 \ +// RUN: --check-prefix=CHECK-FIXED-O2 \ +// RUN: --check-prefix=CHECK-FIXED-L3 \ +// RUN: --check-prefix=CHECK-FIXED-I4 \ +// RUN: < %t %s + +// Test all reserve-* options together. +// RUN: %clang --target=sparc-none-gnu \ +// RUN: -ffixed-g1 \ +// RUN: -ffixed-g2 \ +// RUN: -ffixed-g3 \ +// RUN: -ffixed-g4 \ +// RUN: -ffixed-g5 \ +// RUN: -ffixed-g6 \ +// RUN: -ffixed-g7 \ +// RUN: -ffixed-o0 \ +// RUN: -ffixed-o1 \ +// RUN: -ffixed-o2 \ +// RUN: -ffixed-o3 \ +// RUN: -ffixed-o4 \ +// RUN: -ffixed-o5 \ +// RUN: -ffixed-l0 \ +// RUN: -ffixed-l1 \ +// RUN: -ffixed-l2 \ +// RUN: -ffixed-l3 \ +// RUN: -ffixed-l4 \ +// RUN: -ffixed-l5 \ +// RUN: -ffixed-l6 \ +// RUN: -ffixed-l7 \ +// RUN: -ffixed-i0 \ +// RUN: -ffixed-i1 \ +// RUN: -ffixed-i2 \ +// RUN: -ffixed-i3 \ +// RUN: -ffixed-i4 \ +// RUN: -ffixed-i5 \ +// RUN: -### %s 2> %t +// RUN: FileCheck \ +// RUN: --check-prefix=CHECK-FIXED-G1 \ +// RUN: --check-prefix=CHECK-FIXED-G2 \ +// RUN: --check-prefix=CHECK-FIXED-G3 \ +// RUN: --check-prefix=CHECK-FIXED-G4 \ +// RUN: --check-prefix=CHECK-FIXED-G5 \ +// RUN: --check-prefix=CHECK-FIXED-G6 \ +// RUN: --check-prefix=CHECK-FIXED-G7 \ +// RUN: --check-prefix=CHECK-FIXED-O0 \ +// RUN: --check-prefix=CHECK-FIXED-O1 \ +// RUN: --check-prefix=CHECK-FIXED-O2 \ +// RUN: --check-prefix=CHECK-FIXED-O3 \ +// RUN: --check-prefix=CHECK-FIXED-O4 \ +// RUN: --check-prefix=CHECK-FIXED-O5 \ +// RUN: --check-prefix=CHECK-FIXED-L0 \ +// RUN: --check-prefix=CHECK-FIXED-L1 \ +// RUN: --check-prefix=CHECK-FIXED-L2 \ +// RUN: --check-prefix=CHECK-FIXED-L3 \ +// RUN: --check-prefix=CHECK-FIXED-L4 \ +// RUN: --check-prefix=CHECK-FIXED-L5 \ +// RUN: --check-prefix=CHECK-FIXED-L6 \ +// RUN: --check-prefix=CHECK-FIXED-L7 \ +// RUN: --check-prefix=CHECK-FIXED-I0 \ +// RUN: --check-prefix=CHECK-FIXED-I1 \ +// RUN: --check-prefix=CHECK-FIXED-I2 \ +// RUN: --check-prefix=CHECK-FIXED-I3 \ +// RUN: --check-prefix=CHECK-FIXED-I4 \ +// RUN: --check-prefix=CHECK-FIXED-I5 \ +// RUN: < %t %s diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index a471e36f8d6825..0beba12dda62ae 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -26856,6 +26856,7 @@ TEST_F(FormatTest, RemoveParentheses) { EXPECT_EQ(Style.RemoveParentheses, FormatStyle::RPS_Leave); Style.RemoveParentheses = FormatStyle::RPS_MultipleParentheses; + verifyFormat("#define Foo(...) foo((__VA_ARGS__))", Style); verifyFormat("int x __attribute__((aligned(16))) = 0;", Style); verifyFormat("decltype((foo->bar)) baz;", Style); verifyFormat("class __declspec(dllimport) X {};", @@ -26890,6 +26891,7 @@ TEST_F(FormatTest, RemoveParentheses) { verifyFormat("return (({ 0; }));", "return ((({ 0; })));", Style); Style.RemoveParentheses = FormatStyle::RPS_ReturnStatement; + verifyFormat("#define Return0 return (0);", Style); verifyFormat("return 0;", "return (0);", Style); verifyFormat("co_return 0;", "co_return ((0));", Style); verifyFormat("return 0;", "return (((0)));", Style); diff --git a/clang/unittests/Serialization/CMakeLists.txt b/clang/unittests/Serialization/CMakeLists.txt index 10d7de970c643d..e7eebd0cb98239 100644 --- a/clang/unittests/Serialization/CMakeLists.txt +++ b/clang/unittests/Serialization/CMakeLists.txt @@ -10,6 +10,7 @@ add_clang_unittest(SerializationTests InMemoryModuleCacheTest.cpp ModuleCacheTest.cpp NoCommentsTest.cpp + PreambleInNamedModulesTest.cpp SourceLocationEncodingTest.cpp VarDeclConstantInitTest.cpp ) diff --git a/clang/unittests/Serialization/PreambleInNamedModulesTest.cpp b/clang/unittests/Serialization/PreambleInNamedModulesTest.cpp new file mode 100644 index 00000000000000..d26e1cb633654f --- /dev/null +++ b/clang/unittests/Serialization/PreambleInNamedModulesTest.cpp @@ -0,0 +1,132 @@ +//===- unittests/Serialization/PreambleInNamedModulesTest.cpp -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Frontend/CompilerInstance.h" +#include "clang/Frontend/CompilerInvocation.h" +#include "clang/Frontend/FrontendActions.h" +#include "clang/Frontend/PrecompiledPreamble.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/raw_ostream.h" + +#include "gtest/gtest.h" + +using namespace llvm; +using namespace clang; + +namespace { + +class PreambleInNamedModulesTest : public ::testing::Test { + void SetUp() override { + ASSERT_FALSE(sys::fs::createUniqueDirectory("modules-test", TestDir)); + } + + void TearDown() override { sys::fs::remove_directories(TestDir); } + +public: + using PathType = SmallString<256>; + + PathType TestDir; + + void addFile(StringRef Path, StringRef Contents, PathType &AbsPath) { + ASSERT_FALSE(sys::path::is_absolute(Path)); + + AbsPath = TestDir; + sys::path::append(AbsPath, Path); + + ASSERT_FALSE( + sys::fs::create_directories(llvm::sys::path::parent_path(AbsPath))); + + std::error_code EC; + llvm::raw_fd_ostream OS(AbsPath, EC); + ASSERT_FALSE(EC); + OS << Contents; + } + + void addFile(StringRef Path, StringRef Contents) { + PathType UnusedAbsPath; + addFile(Path, Contents, UnusedAbsPath); + } +}; + +// Testing that the use of Preamble in named modules can work basically. +// See https://github.com/llvm/llvm-project/issues/80570 +TEST_F(PreambleInNamedModulesTest, BasicTest) { + addFile("foo.h", R"cpp( +enum class E { + A, + B, + C, + D +}; + )cpp"); + + PathType MainFilePath; + addFile("A.cppm", R"cpp( +module; +#include "foo.h" +export module A; +export using ::E; + )cpp", + MainFilePath); + + IntrusiveRefCntPtr Diags = + CompilerInstance::createDiagnostics(new DiagnosticOptions()); + IntrusiveRefCntPtr VFS = + llvm::vfs::createPhysicalFileSystem(); + + CreateInvocationOptions CIOpts; + CIOpts.Diags = Diags; + CIOpts.VFS = VFS; + + const char *Args[] = {"clang++", "-std=c++20", "-working-directory", + TestDir.c_str(), MainFilePath.c_str()}; + std::shared_ptr Invocation = + createInvocation(Args, CIOpts); + ASSERT_TRUE(Invocation); + + llvm::ErrorOr> ContentsBuffer = + llvm::MemoryBuffer::getFile(MainFilePath, /*IsText=*/true); + EXPECT_TRUE(ContentsBuffer); + std::unique_ptr Buffer = std::move(*ContentsBuffer); + + PreambleBounds Bounds = + ComputePreambleBounds(Invocation->getLangOpts(), *Buffer, 0); + + PreambleCallbacks Callbacks; + llvm::ErrorOr BuiltPreamble = PrecompiledPreamble::Build( + *Invocation, Buffer.get(), Bounds, *Diags, VFS, + std::make_shared(), + /*StoreInMemory=*/false, /*StoragePath=*/TestDir, Callbacks); + + ASSERT_FALSE(Diags->hasErrorOccurred()); + + EXPECT_TRUE(BuiltPreamble); + EXPECT_TRUE(BuiltPreamble->CanReuse(*Invocation, *Buffer, Bounds, *VFS)); + BuiltPreamble->OverridePreamble(*Invocation, VFS, Buffer.get()); + + auto Clang = std::make_unique( + std::make_shared()); + Clang->setInvocation(std::move(Invocation)); + Clang->setDiagnostics(Diags.get()); + + if (auto VFSWithRemapping = createVFSFromCompilerInvocation( + Clang->getInvocation(), Clang->getDiagnostics(), VFS)) + VFS = VFSWithRemapping; + + Clang->createFileManager(VFS); + EXPECT_TRUE(Clang->createTarget()); + + Buffer.release(); + + SyntaxOnlyAction Action; + EXPECT_TRUE(Clang->ExecuteAction(Action)); + EXPECT_FALSE(Clang->getDiagnosticsPtr()->hasErrorOccurred()); +} + +} // namespace diff --git a/compiler-rt/lib/profile/InstrProfilingFile.c b/compiler-rt/lib/profile/InstrProfilingFile.c index 867ae73f0d3b27..f3b457d786e6bd 100644 --- a/compiler-rt/lib/profile/InstrProfilingFile.c +++ b/compiler-rt/lib/profile/InstrProfilingFile.c @@ -677,6 +677,7 @@ static void initializeProfileForContinuousMode(void) { PROF_ERR("Continuous counter sync mode is enabled, but raw profile is not" "page-aligned. CurrentFileOffset = %" PRIu64 ", pagesz = %u.\n", (uint64_t)CurrentFileOffset, PageSize); + fclose(File); return; } if (writeProfileWithFileObject(Filename, File) != 0) { @@ -692,6 +693,8 @@ static void initializeProfileForContinuousMode(void) { if (doMerging()) { lprofUnlockFileHandle(File); + } + if (File != NULL) { fclose(File); } } diff --git a/compiler-rt/lib/profile/InstrProfilingPlatformAIX.c b/compiler-rt/lib/profile/InstrProfilingPlatformAIX.c index 9f46a98d78ac4e..002bec164d7e85 100644 --- a/compiler-rt/lib/profile/InstrProfilingPlatformAIX.c +++ b/compiler-rt/lib/profile/InstrProfilingPlatformAIX.c @@ -195,6 +195,8 @@ static const int dummy_name[0] COMPILER_RT_SECTION( COMPILER_RT_SEG INSTR_PROF_NAME_SECT_NAME); static int dummy_vnds[0] COMPILER_RT_SECTION( COMPILER_RT_SEG INSTR_PROF_VNODES_SECT_NAME); +static int dummy_orderfile[0] COMPILER_RT_SECTION( + COMPILER_RT_SEG INSTR_PROF_ORDERFILE_SECT_NAME); // To avoid GC'ing of the dummy variables by the linker, reference them in an // array and reference the array in the runtime registration code @@ -206,7 +208,7 @@ static int dummy_vnds[0] COMPILER_RT_SECTION( COMPILER_RT_VISIBILITY void *__llvm_profile_keep[] = {(void *)&dummy_cnts, (void *)&dummy_bits, (void *)&dummy_data, (void *)&dummy_name, - (void *)&dummy_vnds}; + (void *)&dummy_vnds, (void *)&dummy_orderfile}; #ifdef __GNUC__ #pragma GCC diagnostic pop #endif diff --git a/compiler-rt/test/profile/AIX/bexpfull-pgo.c b/compiler-rt/test/profile/AIX/bexpfull-pgo.c new file mode 100644 index 00000000000000..f48242ec6bfeaa --- /dev/null +++ b/compiler-rt/test/profile/AIX/bexpfull-pgo.c @@ -0,0 +1,7 @@ +// RUN: %clang_pgogen %s -bexpall +// RUN: %clang_pgogen %s -bexpfull + +#include +int ar[10]; +int n; +int main() { memcpy(ar, ar + 1, n); }; diff --git a/libcxx/docs/Modules.rst b/libcxx/docs/Modules.rst index 533c3fbd2a1eea..ee2b81d3b9e7ca 100644 --- a/libcxx/docs/Modules.rst +++ b/libcxx/docs/Modules.rst @@ -218,9 +218,13 @@ Building this project is done with the following steps, assuming the files $ mkdir build $ cmake -G Ninja -S . -B build -DCMAKE_CXX_COMPILER= -DLIBCXX_BUILD= + $ ninja -j1 std -C build $ ninja -C build $ build/main +.. note:: The ``std`` dependencies of ``std.compat`` is not always resolved when + building the ``std`` target using multiple jobs. + .. warning:: ```` should point point to the real binary and not to a symlink. diff --git a/libcxx/include/print b/libcxx/include/print index 7f2b5bac3dcf61..543a540ee4f27d 100644 --- a/libcxx/include/print +++ b/libcxx/include/print @@ -32,6 +32,7 @@ namespace std { */ #include <__assert> // all public C++ headers provide the assertion handler +#include <__availability> #include <__concepts/same_as.h> #include <__config> #include <__system_error/system_error.h> @@ -43,10 +44,6 @@ namespace std { #include #include -#if __has_include() -# include -#endif - #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header #endif @@ -68,7 +65,8 @@ _LIBCPP_EXPORTED_FROM_ABI bool __is_windows_terminal(FILE* __stream); // Note the function is only implemented on the Windows platform. _LIBCPP_EXPORTED_FROM_ABI void __write_to_windows_console(FILE* __stream, wstring_view __view); # endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS - +#elif __has_include() +_LIBCPP_EXPORTED_FROM_ABI bool __is_posix_terminal(FILE* __stream); #endif // _LIBCPP_WIN32API #if _LIBCPP_STD_VER >= 23 @@ -195,15 +193,17 @@ inline constexpr bool __use_unicode_execution_charset = _MSVC_EXECUTION_CHARACTE inline constexpr bool __use_unicode_execution_charset = true; # endif -_LIBCPP_HIDE_FROM_ABI inline bool __is_terminal(FILE* __stream) { +_LIBCPP_HIDE_FROM_ABI inline bool __is_terminal([[maybe_unused]] FILE* __stream) { // The macro _LIBCPP_TESTING_PRINT_IS_TERMINAL is used to change // the behavior in the test. This is not part of the public API. # ifdef _LIBCPP_TESTING_PRINT_IS_TERMINAL return _LIBCPP_TESTING_PRINT_IS_TERMINAL(__stream); +# elif _LIBCPP_AVAILABILITY_HAS_PRINT == 0 + return false; # elif defined(_LIBCPP_WIN32API) return std::__is_windows_terminal(__stream); # elif __has_include() - return isatty(fileno(__stream)); + return std::__is_posix_terminal(__stream); # else # error "Provide a way to determine whether a FILE* is a terminal" # endif diff --git a/libcxx/lib/abi/CHANGELOG.TXT b/libcxx/lib/abi/CHANGELOG.TXT index 1179c253f18c8f..7ff604959f4d5c 100644 --- a/libcxx/lib/abi/CHANGELOG.TXT +++ b/libcxx/lib/abi/CHANGELOG.TXT @@ -16,6 +16,14 @@ New entries should be added directly below the "Version" header. Version 18.0 ------------ +* [libc++] Moves is_terminal to the dylib + + The patch moves the POSIX implementation of is_terminal to the dylib. This is + needed to avoid using in public headers. + + All platforms + Symbol added: _ZNSt6__ndk119__is_posix_terminalEP7__sFILE + * [libc++abi] Implement __cxa_init_primary_exception and use it to optimize std::make_exception_ptr (#65534) This patch implements __cxa_init_primary_exception, an extension to the Itanium C++ ABI. diff --git a/libcxx/lib/abi/arm64-apple-darwin.libcxxabi.v1.stable.exceptions.nonew.abilist b/libcxx/lib/abi/arm64-apple-darwin.libcxxabi.v1.stable.exceptions.nonew.abilist index c2fea4d8adb420..2064f45bf8c084 100644 --- a/libcxx/lib/abi/arm64-apple-darwin.libcxxabi.v1.stable.exceptions.nonew.abilist +++ b/libcxx/lib/abi/arm64-apple-darwin.libcxxabi.v1.stable.exceptions.nonew.abilist @@ -1495,6 +1495,7 @@ {'is_defined': True, 'name': '__ZNSt3__118shared_timed_mutex8try_lockEv', 'type': 'FUNC'} {'is_defined': True, 'name': '__ZNSt3__118shared_timed_mutexC1Ev', 'type': 'FUNC'} {'is_defined': True, 'name': '__ZNSt3__118shared_timed_mutexC2Ev', 'type': 'FUNC'} +{'is_defined': True, 'name': '__ZNSt3__119__is_posix_terminalEP7__sFILE', 'type': 'FUNC'} {'is_defined': True, 'name': '__ZNSt3__119__shared_mutex_base11lock_sharedEv', 'type': 'FUNC'} {'is_defined': True, 'name': '__ZNSt3__119__shared_mutex_base13unlock_sharedEv', 'type': 'FUNC'} {'is_defined': True, 'name': '__ZNSt3__119__shared_mutex_base15try_lock_sharedEv', 'type': 'FUNC'} diff --git a/libcxx/lib/abi/i686-linux-android21.libcxxabi.v1.stable.exceptions.nonew.abilist b/libcxx/lib/abi/i686-linux-android21.libcxxabi.v1.stable.exceptions.nonew.abilist index a60f099b532052..fec3a4505a0c6d 100644 --- a/libcxx/lib/abi/i686-linux-android21.libcxxabi.v1.stable.exceptions.nonew.abilist +++ b/libcxx/lib/abi/i686-linux-android21.libcxxabi.v1.stable.exceptions.nonew.abilist @@ -1176,6 +1176,7 @@ {'is_defined': True, 'name': '_ZNSt6__ndk118shared_timed_mutex8try_lockEv', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNSt6__ndk118shared_timed_mutexC1Ev', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNSt6__ndk118shared_timed_mutexC2Ev', 'type': 'FUNC'} +{'is_defined': True, 'name': '_ZNSt6__ndk119__is_posix_terminalEP7__sFILE', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNSt6__ndk119__shared_mutex_base11lock_sharedEv', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNSt6__ndk119__shared_mutex_base13unlock_sharedEv', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNSt6__ndk119__shared_mutex_base15try_lock_sharedEv', 'type': 'FUNC'} diff --git a/libcxx/lib/abi/powerpc-ibm-aix.libcxxabi.v1.stable.exceptions.nonew.abilist b/libcxx/lib/abi/powerpc-ibm-aix.libcxxabi.v1.stable.exceptions.nonew.abilist index a159ff52218667..e52cf98dd4c4f1 100644 --- a/libcxx/lib/abi/powerpc-ibm-aix.libcxxabi.v1.stable.exceptions.nonew.abilist +++ b/libcxx/lib/abi/powerpc-ibm-aix.libcxxabi.v1.stable.exceptions.nonew.abilist @@ -534,6 +534,7 @@ {'import_export': 'EXP', 'is_defined': True, 'name': '_ZNSt3__118shared_timed_mutex8try_lockEv', 'storage_mapping_class': 'DS', 'type': 'FUNC'} {'import_export': 'EXP', 'is_defined': True, 'name': '_ZNSt3__118shared_timed_mutexC1Ev', 'storage_mapping_class': 'DS', 'type': 'FUNC'} {'import_export': 'EXP', 'is_defined': True, 'name': '_ZNSt3__118shared_timed_mutexC2Ev', 'storage_mapping_class': 'DS', 'type': 'FUNC'} +{'import_export': 'EXP', 'is_defined': True, 'name': '_ZNSt3__119__is_posix_terminalEP4FILE', 'storage_mapping_class': 'DS', 'type': 'FUNC'} {'import_export': 'EXP', 'is_defined': True, 'name': '_ZNSt3__119__shared_mutex_base11lock_sharedEv', 'storage_mapping_class': 'DS', 'type': 'FUNC'} {'import_export': 'EXP', 'is_defined': True, 'name': '_ZNSt3__119__shared_mutex_base13unlock_sharedEv', 'storage_mapping_class': 'DS', 'type': 'FUNC'} {'import_export': 'EXP', 'is_defined': True, 'name': '_ZNSt3__119__shared_mutex_base15try_lock_sharedEv', 'storage_mapping_class': 'DS', 'type': 'FUNC'} diff --git a/libcxx/lib/abi/powerpc64-ibm-aix.libcxxabi.v1.stable.exceptions.nonew.abilist b/libcxx/lib/abi/powerpc64-ibm-aix.libcxxabi.v1.stable.exceptions.nonew.abilist index 5749a7520f9bac..52a04706ddf20b 100644 --- a/libcxx/lib/abi/powerpc64-ibm-aix.libcxxabi.v1.stable.exceptions.nonew.abilist +++ b/libcxx/lib/abi/powerpc64-ibm-aix.libcxxabi.v1.stable.exceptions.nonew.abilist @@ -534,6 +534,7 @@ {'import_export': 'EXP', 'is_defined': True, 'name': '_ZNSt3__118shared_timed_mutex8try_lockEv', 'storage_mapping_class': 'DS', 'type': 'FUNC'} {'import_export': 'EXP', 'is_defined': True, 'name': '_ZNSt3__118shared_timed_mutexC1Ev', 'storage_mapping_class': 'DS', 'type': 'FUNC'} {'import_export': 'EXP', 'is_defined': True, 'name': '_ZNSt3__118shared_timed_mutexC2Ev', 'storage_mapping_class': 'DS', 'type': 'FUNC'} +{'import_export': 'EXP', 'is_defined': True, 'name': '_ZNSt3__119__is_posix_terminalEP4FILE', 'storage_mapping_class': 'DS', 'type': 'FUNC'} {'import_export': 'EXP', 'is_defined': True, 'name': '_ZNSt3__119__shared_mutex_base11lock_sharedEv', 'storage_mapping_class': 'DS', 'type': 'FUNC'} {'import_export': 'EXP', 'is_defined': True, 'name': '_ZNSt3__119__shared_mutex_base13unlock_sharedEv', 'storage_mapping_class': 'DS', 'type': 'FUNC'} {'import_export': 'EXP', 'is_defined': True, 'name': '_ZNSt3__119__shared_mutex_base15try_lock_sharedEv', 'storage_mapping_class': 'DS', 'type': 'FUNC'} diff --git a/libcxx/lib/abi/x86_64-apple-darwin.libcxxabi.v1.stable.exceptions.nonew.abilist b/libcxx/lib/abi/x86_64-apple-darwin.libcxxabi.v1.stable.exceptions.nonew.abilist index e827114f169197..bced6b2ea81ba5 100644 --- a/libcxx/lib/abi/x86_64-apple-darwin.libcxxabi.v1.stable.exceptions.nonew.abilist +++ b/libcxx/lib/abi/x86_64-apple-darwin.libcxxabi.v1.stable.exceptions.nonew.abilist @@ -1495,6 +1495,7 @@ {'is_defined': True, 'name': '__ZNSt3__118shared_timed_mutex8try_lockEv', 'type': 'FUNC'} {'is_defined': True, 'name': '__ZNSt3__118shared_timed_mutexC1Ev', 'type': 'FUNC'} {'is_defined': True, 'name': '__ZNSt3__118shared_timed_mutexC2Ev', 'type': 'FUNC'} +{'is_defined': True, 'name': '__ZNSt3__119__is_posix_terminalEP7__sFILE', 'type': 'FUNC'} {'is_defined': True, 'name': '__ZNSt3__119__shared_mutex_base11lock_sharedEv', 'type': 'FUNC'} {'is_defined': True, 'name': '__ZNSt3__119__shared_mutex_base13unlock_sharedEv', 'type': 'FUNC'} {'is_defined': True, 'name': '__ZNSt3__119__shared_mutex_base15try_lock_sharedEv', 'type': 'FUNC'} diff --git a/libcxx/lib/abi/x86_64-linux-android21.libcxxabi.v1.stable.exceptions.nonew.abilist b/libcxx/lib/abi/x86_64-linux-android21.libcxxabi.v1.stable.exceptions.nonew.abilist index f4077adc074e0a..efa2189e9c9287 100644 --- a/libcxx/lib/abi/x86_64-linux-android21.libcxxabi.v1.stable.exceptions.nonew.abilist +++ b/libcxx/lib/abi/x86_64-linux-android21.libcxxabi.v1.stable.exceptions.nonew.abilist @@ -1176,6 +1176,7 @@ {'is_defined': True, 'name': '_ZNSt6__ndk118shared_timed_mutex8try_lockEv', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNSt6__ndk118shared_timed_mutexC1Ev', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNSt6__ndk118shared_timed_mutexC2Ev', 'type': 'FUNC'} +{'is_defined': True, 'name': '_ZNSt6__ndk119__is_posix_terminalEP7__sFILE', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNSt6__ndk119__shared_mutex_base11lock_sharedEv', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNSt6__ndk119__shared_mutex_base13unlock_sharedEv', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNSt6__ndk119__shared_mutex_base15try_lock_sharedEv', 'type': 'FUNC'} diff --git a/libcxx/lib/abi/x86_64-unknown-freebsd.libcxxabi.v1.stable.exceptions.nonew.abilist b/libcxx/lib/abi/x86_64-unknown-freebsd.libcxxabi.v1.stable.exceptions.nonew.abilist index e3d3fcb35d8403..ebda5b0dfba57d 100644 --- a/libcxx/lib/abi/x86_64-unknown-freebsd.libcxxabi.v1.stable.exceptions.nonew.abilist +++ b/libcxx/lib/abi/x86_64-unknown-freebsd.libcxxabi.v1.stable.exceptions.nonew.abilist @@ -1190,6 +1190,7 @@ {'is_defined': True, 'name': '_ZNSt3__118shared_timed_mutex8try_lockEv', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNSt3__118shared_timed_mutexC1Ev', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNSt3__118shared_timed_mutexC2Ev', 'type': 'FUNC'} +{'is_defined': True, 'name': '_ZNSt3__119__is_posix_terminalEP7__sFILE', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNSt3__119__shared_mutex_base11lock_sharedEv', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNSt3__119__shared_mutex_base13unlock_sharedEv', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNSt3__119__shared_mutex_base15try_lock_sharedEv', 'type': 'FUNC'} diff --git a/libcxx/lib/abi/x86_64-unknown-linux-gnu.libcxxabi.v1.stable.exceptions.nonew.abilist b/libcxx/lib/abi/x86_64-unknown-linux-gnu.libcxxabi.v1.stable.exceptions.nonew.abilist index 16923301d2548e..6432ad3be35859 100644 --- a/libcxx/lib/abi/x86_64-unknown-linux-gnu.libcxxabi.v1.stable.exceptions.nonew.abilist +++ b/libcxx/lib/abi/x86_64-unknown-linux-gnu.libcxxabi.v1.stable.exceptions.nonew.abilist @@ -1188,6 +1188,7 @@ {'is_defined': True, 'name': '_ZNSt3__118shared_timed_mutex8try_lockEv', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNSt3__118shared_timed_mutexC1Ev', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNSt3__118shared_timed_mutexC2Ev', 'type': 'FUNC'} +{'is_defined': True, 'name': '_ZNSt3__119__is_posix_terminalEP8_IO_FILE', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNSt3__119__shared_mutex_base11lock_sharedEv', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNSt3__119__shared_mutex_base13unlock_sharedEv', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNSt3__119__shared_mutex_base15try_lock_sharedEv', 'type': 'FUNC'} diff --git a/libcxx/lib/abi/x86_64-unknown-linux-gnu.libcxxabi.v1.stable.noexceptions.nonew.abilist b/libcxx/lib/abi/x86_64-unknown-linux-gnu.libcxxabi.v1.stable.noexceptions.nonew.abilist index 2380ffb100de97..1fe84e17b3f7f0 100644 --- a/libcxx/lib/abi/x86_64-unknown-linux-gnu.libcxxabi.v1.stable.noexceptions.nonew.abilist +++ b/libcxx/lib/abi/x86_64-unknown-linux-gnu.libcxxabi.v1.stable.noexceptions.nonew.abilist @@ -1159,6 +1159,7 @@ {'is_defined': True, 'name': '_ZNSt3__118shared_timed_mutex8try_lockEv', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNSt3__118shared_timed_mutexC1Ev', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNSt3__118shared_timed_mutexC2Ev', 'type': 'FUNC'} +{'is_defined': True, 'name': '_ZNSt3__119__is_posix_terminalEP8_IO_FILE', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNSt3__119__shared_mutex_base11lock_sharedEv', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNSt3__119__shared_mutex_base13unlock_sharedEv', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNSt3__119__shared_mutex_base15try_lock_sharedEv', 'type': 'FUNC'} diff --git a/libcxx/modules/CMakeLists.txt b/libcxx/modules/CMakeLists.txt index 0388c048dacb8b..0dea8cfca94ac3 100644 --- a/libcxx/modules/CMakeLists.txt +++ b/libcxx/modules/CMakeLists.txt @@ -137,6 +137,25 @@ set(LIBCXX_MODULE_STD_COMPAT_SOURCES std.compat/cwctype.inc ) +# TODO MODULES the CMakeLists.txt in the build directory is only temporary. +# This allows using as available in the build directory. Once build systems +# have proper support for the installed files this will be removed. +if ("${LIBCXX_GENERATED_INCLUDE_DIR}" STREQUAL "${LIBCXX_GENERATED_INCLUDE_TARGET_DIR}") + # This typically happens when the target is not installed. + set(LIBCXX_CONFIGURED_INCLUDE_DIRS "${LIBCXX_GENERATED_INCLUDE_DIR}") +else() + # It's important that the arch directory be included first so that its header files + # which interpose on the default include dir be included instead of the default ones. + set(LIBCXX_CONFIGURED_INCLUDE_DIRS + "${LIBCXX_GENERATED_INCLUDE_TARGET_DIR};${LIBCXX_GENERATED_INCLUDE_DIR}" + ) +endif() +configure_file( + "CMakeLists.txt.in" + "${LIBCXX_GENERATED_MODULE_DIR}/CMakeLists.txt" + @ONLY +) + set(LIBCXX_MODULE_STD_INCLUDE_SOURCES) foreach(file ${LIBCXX_MODULE_STD_SOURCES}) set( @@ -166,6 +185,7 @@ configure_file( ) set(_all_modules) +list(APPEND _all_modules "${LIBCXX_GENERATED_MODULE_DIR}/CMakeLists.txt") list(APPEND _all_modules "${LIBCXX_GENERATED_MODULE_DIR}/std.cppm") list(APPEND _all_modules "${LIBCXX_GENERATED_MODULE_DIR}/std.compat.cppm") foreach(file ${LIBCXX_MODULE_STD_SOURCES} ${LIBCXX_MODULE_STD_COMPAT_SOURCES}) diff --git a/libcxx/modules/CMakeLists.txt.in b/libcxx/modules/CMakeLists.txt.in new file mode 100644 index 00000000000000..e332d70cc16333 --- /dev/null +++ b/libcxx/modules/CMakeLists.txt.in @@ -0,0 +1,88 @@ +cmake_minimum_required(VERSION 3.26) + +project(libc++-modules LANGUAGES CXX) + +# Enable CMake's module support +if(CMAKE_VERSION VERSION_LESS "3.28.0") + if(CMAKE_VERSION VERSION_LESS "3.27.0") + set(CMAKE_EXPERIMENTAL_CXX_MODULE_CMAKE_API "2182bf5c-ef0d-489a-91da-49dbc3090d2a") + else() + set(CMAKE_EXPERIMENTAL_CXX_MODULE_CMAKE_API "aa1f7df0-828a-4fcd-9afc-2dc80491aca7") + endif() + set(CMAKE_EXPERIMENTAL_CXX_MODULE_DYNDEP 1) +else() + cmake_policy(VERSION 3.28) +endif() + +# Default to C++ extensions being off. Libc++'s modules support have trouble +# with extensions right now. +set(CMAKE_CXX_EXTENSIONS OFF) + +# Propagates the CMake options to the modules. +# +# This uses the std module hard-coded since the std.compat module does not +# depend on these flags. +macro(compile_define_if_not condition def) + if (NOT ${condition}) + target_compile_definitions(std PRIVATE ${def}) + endif() +endmacro() +macro(compile_define_if condition def) + if (${condition}) + target_compile_definitions(std PRIVATE ${def}) + endif() +endmacro() + +### STD + +add_library(std) +target_sources(std + PUBLIC FILE_SET cxx_modules TYPE CXX_MODULES FILES + std.cppm +) + +target_include_directories(std SYSTEM PRIVATE @LIBCXX_CONFIGURED_INCLUDE_DIRS@) + +if (NOT @LIBCXX_ENABLE_EXCEPTIONS@) + target_compile_options(std PUBLIC -fno-exceptions) +endif() + +target_compile_options(std + PUBLIC + -nostdinc++ + -Wno-reserved-module-identifier + -Wno-reserved-user-defined-literal + @LIBCXX_COMPILE_FLAGS@ +) +set_target_properties(std + PROPERTIES + OUTPUT_NAME "c++std" +) + +### STD.COMPAT + +add_library(std.compat) +target_sources(std.compat + PUBLIC FILE_SET cxx_modules TYPE CXX_MODULES FILES + std.compat.cppm +) + +target_include_directories(std.compat SYSTEM PRIVATE @LIBCXX_CONFIGURED_INCLUDE_DIRS@) + +if (NOT @LIBCXX_ENABLE_EXCEPTIONS@) + target_compile_options(std.compat PUBLIC -fno-exceptions) +endif() + +target_compile_options(std.compat + PUBLIC + -nostdinc++ + -Wno-reserved-module-identifier + -Wno-reserved-user-defined-literal + -fmodule-file=std=${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/std.dir/std.pcm + @LIBCXX_COMPILE_FLAGS@ +) +set_target_properties(std.compat + PROPERTIES + OUTPUT_NAME "c++std.compat" +) +add_dependencies(std.compat std) diff --git a/libcxx/src/print.cpp b/libcxx/src/print.cpp index 3692187a5954a3..8fa59fdd097bcd 100644 --- a/libcxx/src/print.cpp +++ b/libcxx/src/print.cpp @@ -8,22 +8,26 @@ #include <__config> -#if defined(_LIBCPP_WIN32API) +#include +#include + +#include <__system_error/system_error.h> -# include -# include +#include "filesystem/error.h" +#if defined(_LIBCPP_WIN32API) # define WIN32_LEAN_AND_MEAN # define NOMINMAX # include # include - -# include <__system_error/system_error.h> - -# include "filesystem/error.h" +#elif __has_include() +# include +#endif _LIBCPP_BEGIN_NAMESPACE_STD +#if defined(_LIBCPP_WIN32API) + _LIBCPP_EXPORTED_FROM_ABI bool __is_windows_terminal(FILE* __stream) { // Note the Standard does this in one call, but it's unclear whether // an invalid handle is allowed when calling GetConsoleMode. @@ -52,6 +56,9 @@ __write_to_windows_console([[maybe_unused]] FILE* __stream, [[maybe_unused]] wst } # endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS -_LIBCPP_END_NAMESPACE_STD +#elif __has_include() // !_LIBCPP_WIN32API -#endif // !_LIBCPP_WIN32API +_LIBCPP_EXPORTED_FROM_ABI bool __is_posix_terminal(FILE* __stream) { return isatty(fileno(__stream)); } +#endif + +_LIBCPP_END_NAMESPACE_STD diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp index ab2ec5b447d000..05fd38fb753fda 100644 --- a/lld/ELF/Arch/LoongArch.cpp +++ b/lld/ELF/Arch/LoongArch.cpp @@ -36,6 +36,8 @@ class LoongArch final : public TargetInfo { bool usesOnlyLowPageBits(RelType type) const override; void relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const override; + bool relaxOnce(int pass) const override; + void finalizeRelax(int passes) const override; }; } // end anonymous namespace @@ -465,8 +467,9 @@ RelExpr LoongArch::getRelExpr(const RelType type, const Symbol &s, case R_LARCH_TLS_GD_HI20: return R_TLSGD_GOT; case R_LARCH_RELAX: - // LoongArch linker relaxation is not implemented yet. - return R_NONE; + return config->relax ? R_RELAX_HINT : R_NONE; + case R_LARCH_ALIGN: + return R_RELAX_HINT; // Other known relocs that are explicitly unimplemented: // @@ -659,6 +662,155 @@ void LoongArch::relocate(uint8_t *loc, const Relocation &rel, } } +static bool relax(InputSection &sec) { + const uint64_t secAddr = sec.getVA(); + const MutableArrayRef relocs = sec.relocs(); + auto &aux = *sec.relaxAux; + bool changed = false; + ArrayRef sa = ArrayRef(aux.anchors); + uint64_t delta = 0; + + std::fill_n(aux.relocTypes.get(), relocs.size(), R_LARCH_NONE); + aux.writes.clear(); + for (auto [i, r] : llvm::enumerate(relocs)) { + const uint64_t loc = secAddr + r.offset - delta; + uint32_t &cur = aux.relocDeltas[i], remove = 0; + switch (r.type) { + case R_LARCH_ALIGN: { + const uint64_t addend = + r.sym->isUndefined() ? Log2_64(r.addend) + 1 : r.addend; + const uint64_t allBytes = (1 << (addend & 0xff)) - 4; + const uint64_t align = 1 << (addend & 0xff); + const uint64_t maxBytes = addend >> 8; + const uint64_t off = loc & (align - 1); + const uint64_t curBytes = off == 0 ? 0 : align - off; + // All bytes beyond the alignment boundary should be removed. + // If emit bytes more than max bytes to emit, remove all. + if (maxBytes != 0 && curBytes > maxBytes) + remove = allBytes; + else + remove = allBytes - curBytes; + // If we can't satisfy this alignment, we've found a bad input. + if (LLVM_UNLIKELY(static_cast(remove) < 0)) { + errorOrWarn(getErrorLocation((const uint8_t *)loc) + + "insufficient padding bytes for " + lld::toString(r.type) + + ": " + Twine(allBytes) + " bytes available for " + + "requested alignment of " + Twine(align) + " bytes"); + remove = 0; + } + break; + } + } + + // For all anchors whose offsets are <= r.offset, they are preceded by + // the previous relocation whose `relocDeltas` value equals `delta`. + // Decrease their st_value and update their st_size. + for (; sa.size() && sa[0].offset <= r.offset; sa = sa.slice(1)) { + if (sa[0].end) + sa[0].d->size = sa[0].offset - delta - sa[0].d->value; + else + sa[0].d->value = sa[0].offset - delta; + } + delta += remove; + if (delta != cur) { + cur = delta; + changed = true; + } + } + + for (const SymbolAnchor &a : sa) { + if (a.end) + a.d->size = a.offset - delta - a.d->value; + else + a.d->value = a.offset - delta; + } + // Inform assignAddresses that the size has changed. + if (!isUInt<32>(delta)) + fatal("section size decrease is too large: " + Twine(delta)); + sec.bytesDropped = delta; + return changed; +} + +// When relaxing just R_LARCH_ALIGN, relocDeltas is usually changed only once in +// the absence of a linker script. For call and load/store R_LARCH_RELAX, code +// shrinkage may reduce displacement and make more relocations eligible for +// relaxation. Code shrinkage may increase displacement to a call/load/store +// target at a higher fixed address, invalidating an earlier relaxation. Any +// change in section sizes can have cascading effect and require another +// relaxation pass. +bool LoongArch::relaxOnce(int pass) const { + if (config->relocatable) + return false; + + if (pass == 0) + initSymbolAnchors(); + + SmallVector storage; + bool changed = false; + for (OutputSection *osec : outputSections) { + if (!(osec->flags & SHF_EXECINSTR)) + continue; + for (InputSection *sec : getInputSections(*osec, storage)) + changed |= relax(*sec); + } + return changed; +} + +void LoongArch::finalizeRelax(int passes) const { + log("relaxation passes: " + Twine(passes)); + SmallVector storage; + for (OutputSection *osec : outputSections) { + if (!(osec->flags & SHF_EXECINSTR)) + continue; + for (InputSection *sec : getInputSections(*osec, storage)) { + RelaxAux &aux = *sec->relaxAux; + if (!aux.relocDeltas) + continue; + + MutableArrayRef rels = sec->relocs(); + ArrayRef old = sec->content(); + size_t newSize = old.size() - aux.relocDeltas[rels.size() - 1]; + uint8_t *p = context().bAlloc.Allocate(newSize); + uint64_t offset = 0; + int64_t delta = 0; + sec->content_ = p; + sec->size = newSize; + sec->bytesDropped = 0; + + // Update section content: remove NOPs for R_LARCH_ALIGN and rewrite + // instructions for relaxed relocations. + for (size_t i = 0, e = rels.size(); i != e; ++i) { + uint32_t remove = aux.relocDeltas[i] - delta; + delta = aux.relocDeltas[i]; + if (remove == 0 && aux.relocTypes[i] == R_LARCH_NONE) + continue; + + // Copy from last location to the current relocated location. + const Relocation &r = rels[i]; + uint64_t size = r.offset - offset; + memcpy(p, old.data() + offset, size); + p += size; + offset = r.offset + remove; + } + memcpy(p, old.data() + offset, old.size() - offset); + + // Subtract the previous relocDeltas value from the relocation offset. + // For a pair of R_LARCH_XXX/R_LARCH_RELAX with the same offset, decrease + // their r_offset by the same delta. + delta = 0; + for (size_t i = 0, e = rels.size(); i != e;) { + uint64_t cur = rels[i].offset; + do { + rels[i].offset -= delta; + if (aux.relocTypes[i] != R_LARCH_NONE) + rels[i].type = aux.relocTypes[i]; + } while (++i != e && rels[i].offset == cur); + delta = aux.relocDeltas[i - 1]; + } + } + } +} + TargetInfo *elf::getLoongArchTargetInfo() { static LoongArch target; return ⌖ diff --git a/lld/ELF/Arch/RISCV.cpp b/lld/ELF/Arch/RISCV.cpp index 8ce92b4badfbd7..5fcab4d39d43a8 100644 --- a/lld/ELF/Arch/RISCV.cpp +++ b/lld/ELF/Arch/RISCV.cpp @@ -45,6 +45,7 @@ class RISCV final : public TargetInfo { uint64_t val) const override; void relocateAlloc(InputSectionBase &sec, uint8_t *buf) const override; bool relaxOnce(int pass) const override; + void finalizeRelax(int passes) const override; }; } // end anonymous namespace @@ -104,26 +105,6 @@ static uint32_t setLO12_S(uint32_t insn, uint32_t imm) { (extractBits(imm, 4, 0) << 7); } -namespace { -struct SymbolAnchor { - uint64_t offset; - Defined *d; - bool end; // true for the anchor of st_value+st_size -}; -} // namespace - -struct elf::RISCVRelaxAux { - // This records symbol start and end offsets which will be adjusted according - // to the nearest relocDeltas element. - SmallVector anchors; - // For relocations[i], the actual offset is - // r_offset - (i ? relocDeltas[i-1] : 0). - std::unique_ptr relocDeltas; - // For relocations[i], the actual type is relocTypes[i]. - std::unique_ptr relocTypes; - SmallVector writes; -}; - RISCV::RISCV() { copyRel = R_RISCV_COPY; pltRel = R_RISCV_JUMP_SLOT; @@ -695,13 +676,13 @@ void RISCV::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const { } } -static void initSymbolAnchors() { +void elf::initSymbolAnchors() { SmallVector storage; for (OutputSection *osec : outputSections) { if (!(osec->flags & SHF_EXECINSTR)) continue; for (InputSection *sec : getInputSections(*osec, storage)) { - sec->relaxAux = make(); + sec->relaxAux = make(); if (sec->relocs().size()) { sec->relaxAux->relocDeltas = std::make_unique(sec->relocs().size()); @@ -948,7 +929,7 @@ bool RISCV::relaxOnce(int pass) const { return changed; } -void elf::riscvFinalizeRelax(int passes) { +void RISCV::finalizeRelax(int passes) const { llvm::TimeTraceScope timeScope("Finalize RISC-V relaxation"); log("relaxation passes: " + Twine(passes)); SmallVector storage; @@ -956,7 +937,7 @@ void elf::riscvFinalizeRelax(int passes) { if (!(osec->flags & SHF_EXECINSTR)) continue; for (InputSection *sec : getInputSections(*osec, storage)) { - RISCVRelaxAux &aux = *sec->relaxAux; + RelaxAux &aux = *sec->relaxAux; if (!aux.relocDeltas) continue; diff --git a/lld/ELF/Arch/SystemZ.cpp b/lld/ELF/Arch/SystemZ.cpp new file mode 100644 index 00000000000000..d37db6877559dc --- /dev/null +++ b/lld/ELF/Arch/SystemZ.cpp @@ -0,0 +1,607 @@ +//===- SystemZ.cpp --------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "OutputSections.h" +#include "Symbols.h" +#include "SyntheticSections.h" +#include "Target.h" +#include "lld/Common/ErrorHandler.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/Support/Endian.h" + +using namespace llvm; +using namespace llvm::support::endian; +using namespace llvm::ELF; +using namespace lld; +using namespace lld::elf; + +namespace { +class SystemZ : public TargetInfo { +public: + SystemZ(); + int getTlsGdRelaxSkip(RelType type) const override; + RelExpr getRelExpr(RelType type, const Symbol &s, + const uint8_t *loc) const override; + RelType getDynRel(RelType type) const override; + void writeGotHeader(uint8_t *buf) const override; + void writeGotPlt(uint8_t *buf, const Symbol &s) const override; + void writeIgotPlt(uint8_t *buf, const Symbol &s) const override; + void writePltHeader(uint8_t *buf) const override; + void addPltHeaderSymbols(InputSection &isd) const override; + void writePlt(uint8_t *buf, const Symbol &sym, + uint64_t pltEntryAddr) const override; + RelExpr adjustTlsExpr(RelType type, RelExpr expr) const override; + RelExpr adjustGotPcExpr(RelType type, int64_t addend, + const uint8_t *loc) const override; + bool relaxOnce(int pass) const override; + void relocate(uint8_t *loc, const Relocation &rel, + uint64_t val) const override; + int64_t getImplicitAddend(const uint8_t *buf, RelType type) const override; + +private: + void relaxGot(uint8_t *loc, const Relocation &rel, uint64_t val) const; + void relaxTlsGdToIe(uint8_t *loc, const Relocation &rel, uint64_t val) const; + void relaxTlsGdToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const; + void relaxTlsLdToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const; +}; +} // namespace + +SystemZ::SystemZ() { + copyRel = R_390_COPY; + gotRel = R_390_GLOB_DAT; + pltRel = R_390_JMP_SLOT; + relativeRel = R_390_RELATIVE; + iRelativeRel = R_390_IRELATIVE; + symbolicRel = R_390_64; + tlsGotRel = R_390_TLS_TPOFF; + tlsModuleIndexRel = R_390_TLS_DTPMOD; + tlsOffsetRel = R_390_TLS_DTPOFF; + gotHeaderEntriesNum = 3; + gotPltHeaderEntriesNum = 0; + gotEntrySize = 8; + pltHeaderSize = 32; + pltEntrySize = 32; + ipltEntrySize = 32; + + // This "trap instruction" is used to fill gaps between sections. + // On SystemZ, the behavior of the GNU ld is to fill those gaps + // with nop instructions instead - and unfortunately the default + // glibc crt object files (used to) rely on that behavior since + // they use an alignment on the .init section fragments that causes + // gaps which must be filled with nops as they are being executed. + // Therefore, we provide a nop instruction as "trapInstr" here. + trapInstr = {0x07, 0x07, 0x07, 0x07}; + + defaultImageBase = 0x1000000; +} + +RelExpr SystemZ::getRelExpr(RelType type, const Symbol &s, + const uint8_t *loc) const { + switch (type) { + case R_390_NONE: + return R_NONE; + // Relocations targeting the symbol value. + case R_390_8: + case R_390_12: + case R_390_16: + case R_390_20: + case R_390_32: + case R_390_64: + return R_ABS; + case R_390_PC16: + case R_390_PC32: + case R_390_PC64: + case R_390_PC12DBL: + case R_390_PC16DBL: + case R_390_PC24DBL: + case R_390_PC32DBL: + return R_PC; + case R_390_GOTOFF16: + case R_390_GOTOFF: // a.k.a. R_390_GOTOFF32 + case R_390_GOTOFF64: + return R_GOTREL; + // Relocations targeting the PLT associated with the symbol. + case R_390_PLT32: + case R_390_PLT64: + case R_390_PLT12DBL: + case R_390_PLT16DBL: + case R_390_PLT24DBL: + case R_390_PLT32DBL: + return R_PLT_PC; + case R_390_PLTOFF16: + case R_390_PLTOFF32: + case R_390_PLTOFF64: + return R_PLT_GOTREL; + // Relocations targeting the GOT entry associated with the symbol. + case R_390_GOTENT: + return R_GOT_PC; + case R_390_GOT12: + case R_390_GOT16: + case R_390_GOT20: + case R_390_GOT32: + case R_390_GOT64: + return R_GOT_OFF; + // Relocations targeting the GOTPLT entry associated with the symbol. + case R_390_GOTPLTENT: + return R_GOTPLT_PC; + case R_390_GOTPLT12: + case R_390_GOTPLT16: + case R_390_GOTPLT20: + case R_390_GOTPLT32: + case R_390_GOTPLT64: + return R_GOTPLT_GOTREL; + // Relocations targeting _GLOBAL_OFFSET_TABLE_. + case R_390_GOTPC: + case R_390_GOTPCDBL: + return R_GOTONLY_PC; + // TLS-related relocations. + case R_390_TLS_LOAD: + return R_NONE; + case R_390_TLS_GDCALL: + return R_TLSGD_PC; + case R_390_TLS_LDCALL: + return R_TLSLD_PC; + case R_390_TLS_GD32: + case R_390_TLS_GD64: + return R_TLSGD_GOT; + case R_390_TLS_LDM32: + case R_390_TLS_LDM64: + return R_TLSLD_GOT; + case R_390_TLS_LDO32: + case R_390_TLS_LDO64: + return R_DTPREL; + case R_390_TLS_LE32: + case R_390_TLS_LE64: + return R_TPREL; + case R_390_TLS_IE32: + case R_390_TLS_IE64: + return R_GOT; + case R_390_TLS_GOTIE12: + case R_390_TLS_GOTIE20: + case R_390_TLS_GOTIE32: + case R_390_TLS_GOTIE64: + return R_GOT_OFF; + case R_390_TLS_IEENT: + return R_GOT_PC; + + default: + error(getErrorLocation(loc) + "unknown relocation (" + Twine(type) + + ") against symbol " + toString(s)); + return R_NONE; + } +} + +void SystemZ::writeGotHeader(uint8_t *buf) const { + // _GLOBAL_OFFSET_TABLE_[0] holds the value of _DYNAMIC. + // _GLOBAL_OFFSET_TABLE_[1] and [2] are reserved. + write64be(buf, mainPart->dynamic->getVA()); +} + +void SystemZ::writeGotPlt(uint8_t *buf, const Symbol &s) const { + write64be(buf, s.getPltVA() + 14); +} + +void SystemZ::writeIgotPlt(uint8_t *buf, const Symbol &s) const { + if (config->writeAddends) + write64be(buf, s.getVA()); +} + +void SystemZ::writePltHeader(uint8_t *buf) const { + const uint8_t pltData[] = { + 0xe3, 0x10, 0xf0, 0x38, 0x00, 0x24, // stg %r1,56(%r15) + 0xc0, 0x10, 0x00, 0x00, 0x00, 0x00, // larl %r1,_GLOBAL_OFFSET_TABLE_ + 0xd2, 0x07, 0xf0, 0x30, 0x10, 0x08, // mvc 48(8,%r15),8(%r1) + 0xe3, 0x10, 0x10, 0x10, 0x00, 0x04, // lg %r1,16(%r1) + 0x07, 0xf1, // br %r1 + 0x07, 0x00, // nopr + 0x07, 0x00, // nopr + 0x07, 0x00, // nopr + }; + memcpy(buf, pltData, sizeof(pltData)); + uint64_t got = in.got->getVA(); + uint64_t plt = in.plt->getVA(); + write32be(buf + 8, (got - plt - 6) >> 1); +} + +void SystemZ::addPltHeaderSymbols(InputSection &isec) const { + // The PLT header needs a reference to _GLOBAL_OFFSET_TABLE_, so we + // must ensure the .got section is created even if otherwise unused. + in.got->hasGotOffRel.store(true, std::memory_order_relaxed); +} + +void SystemZ::writePlt(uint8_t *buf, const Symbol &sym, + uint64_t pltEntryAddr) const { + const uint8_t inst[] = { + 0xc0, 0x10, 0x00, 0x00, 0x00, 0x00, // larl %r1,<.got.plt slot> + 0xe3, 0x10, 0x10, 0x00, 0x00, 0x04, // lg %r1,0(%r1) + 0x07, 0xf1, // br %r1 + 0x0d, 0x10, // basr %r1,%r0 + 0xe3, 0x10, 0x10, 0x0c, 0x00, 0x14, // lgf %r1,12(%r1) + 0xc0, 0xf4, 0x00, 0x00, 0x00, 0x00, // jg + 0x00, 0x00, 0x00, 0x00, // + }; + memcpy(buf, inst, sizeof(inst)); + + write32be(buf + 2, (sym.getGotPltVA() - pltEntryAddr) >> 1); + write32be(buf + 24, (in.plt->getVA() - pltEntryAddr - 22) >> 1); + write32be(buf + 28, in.relaPlt->entsize * sym.getPltIdx()); +} + +int64_t SystemZ::getImplicitAddend(const uint8_t *buf, RelType type) const { + switch (type) { + case R_390_8: + return SignExtend64<8>(*buf); + case R_390_16: + case R_390_PC16: + return SignExtend64<16>(read16be(buf)); + case R_390_PC16DBL: + return SignExtend64<16>(read16be(buf)) << 1; + case R_390_32: + case R_390_PC32: + return SignExtend64<32>(read32be(buf)); + case R_390_PC32DBL: + return SignExtend64<32>(read32be(buf)) << 1; + case R_390_64: + case R_390_PC64: + case R_390_TLS_DTPMOD: + case R_390_TLS_DTPOFF: + case R_390_TLS_TPOFF: + case R_390_GLOB_DAT: + case R_390_RELATIVE: + case R_390_IRELATIVE: + return read64be(buf); + case R_390_COPY: + case R_390_JMP_SLOT: + case R_390_NONE: + // These relocations are defined as not having an implicit addend. + return 0; + default: + internalLinkerError(getErrorLocation(buf), + "cannot read addend for relocation " + toString(type)); + return 0; + } +} + +RelType SystemZ::getDynRel(RelType type) const { + if (type == R_390_64 || type == R_390_PC64) + return type; + return R_390_NONE; +} + +RelExpr SystemZ::adjustTlsExpr(RelType type, RelExpr expr) const { + if (expr == R_RELAX_TLS_GD_TO_IE) + return R_RELAX_TLS_GD_TO_IE_GOT_OFF; + return expr; +} + +int SystemZ::getTlsGdRelaxSkip(RelType type) const { + // A __tls_get_offset call instruction is marked with 2 relocations: + // + // R_390_TLS_GDCALL / R_390_TLS_LDCALL: marker relocation + // R_390_PLT32DBL: __tls_get_offset + // + // After the relaxation we no longer call __tls_get_offset and should skip + // both relocations to not create a false dependence on __tls_get_offset + // being defined. + // + // Note that this mechanism only works correctly if the R_390_TLS_[GL]DCALL + // is seen immediately *before* the R_390_PLT32DBL. Unfortunately, current + // compilers on the platform will typically generate the inverse sequence. + // To fix this, we sort relocations by offset in RelocationScanner::scan; + // this ensures the correct sequence as the R_390_TLS_[GL]DCALL applies to + // the first byte of the brasl instruction, while the R_390_PLT32DBL applies + // to its third byte (the relative displacement). + + if (type == R_390_TLS_GDCALL || type == R_390_TLS_LDCALL) + return 2; + return 1; +} + +void SystemZ::relaxTlsGdToIe(uint8_t *loc, const Relocation &rel, + uint64_t val) const { + // The general-dynamic code sequence for a global `x`: + // + // Instruction Relocation Symbol + // ear %rX,%a0 + // sllg %rX,%rX,32 + // ear %rX,%a1 + // larl %r12,_GLOBAL_OFFSET_TABLE_ R_390_GOTPCDBL _GLOBAL_OFFSET_TABLE_ + // lgrl %r2,.LC0 R_390_PC32DBL .LC0 + // brasl %r14,__tls_get_offset@plt R_390_TLS_GDCALL x + // :tls_gdcall:x R_390_PLT32DBL __tls_get_offset + // la %r2,0(%r2,%rX) + // + // .LC0: + // .quad x@TLSGD R_390_TLS_GD64 x + // + // Relaxing to initial-exec entails: + // 1) Replacing the call by a load from the GOT. + // 2) Replacing the relocation on the constant LC0 by R_390_TLS_GOTIE64. + + switch (rel.type) { + case R_390_TLS_GDCALL: + // brasl %r14,__tls_get_offset@plt -> lg %r2,0(%r2,%r12) + write16be(loc, 0xe322); + write32be(loc + 2, 0xc0000004); + break; + case R_390_TLS_GD64: + relocateNoSym(loc, R_390_TLS_GOTIE64, val); + break; + default: + llvm_unreachable("unsupported relocation for TLS GD to IE relaxation"); + } +} + +void SystemZ::relaxTlsGdToLe(uint8_t *loc, const Relocation &rel, + uint64_t val) const { + // The general-dynamic code sequence for a global `x`: + // + // Instruction Relocation Symbol + // ear %rX,%a0 + // sllg %rX,%rX,32 + // ear %rX,%a1 + // larl %r12,_GLOBAL_OFFSET_TABLE_ R_390_GOTPCDBL _GLOBAL_OFFSET_TABLE_ + // lgrl %r2,.LC0 R_390_PC32DBL .LC0 + // brasl %r14,__tls_get_offset@plt R_390_TLS_GDCALL x + // :tls_gdcall:x R_390_PLT32DBL __tls_get_offset + // la %r2,0(%r2,%rX) + // + // .LC0: + // .quad x@tlsgd R_390_TLS_GD64 x + // + // Relaxing to local-exec entails: + // 1) Replacing the call by a nop. + // 2) Replacing the relocation on the constant LC0 by R_390_TLS_LE64. + + switch (rel.type) { + case R_390_TLS_GDCALL: + // brasl %r14,__tls_get_offset@plt -> brcl 0,. + write16be(loc, 0xc004); + write32be(loc + 2, 0x00000000); + break; + case R_390_TLS_GD64: + relocateNoSym(loc, R_390_TLS_LE64, val); + break; + default: + llvm_unreachable("unsupported relocation for TLS GD to LE relaxation"); + } +} + +void SystemZ::relaxTlsLdToLe(uint8_t *loc, const Relocation &rel, + uint64_t val) const { + // The local-dynamic code sequence for a global `x`: + // + // Instruction Relocation Symbol + // ear %rX,%a0 + // sllg %rX,%rX,32 + // ear %rX,%a1 + // larl %r12,_GLOBAL_OFFSET_TABLE_ R_390_GOTPCDBL _GLOBAL_OFFSET_TABLE_ + // lgrl %r2,.LC0 R_390_PC32DBL .LC0 + // brasl %r14,__tls_get_offset@plt R_390_TLS_LDCALL + // :tls_ldcall: R_390_PLT32DBL __tls_get_offset + // la %r2,0(%r2,%rX) + // lgrl %rY,.LC1 R_390_PC32DBL .LC1 + // la %r2,0(%r2,%rY) + // + // .LC0: + // .quad @tlsldm R_390_TLS_LDM64 + // .LC1: + // .quad x@dtpoff R_390_TLS_LDO64 x + // + // Relaxing to local-exec entails: + // 1) Replacing the call by a nop. + // 2) Replacing the constant LC0 by 0 (i.e. ignoring the relocation). + // 3) Replacing the relocation on the constant LC1 by R_390_TLS_LE64. + + switch (rel.type) { + case R_390_TLS_LDCALL: + // brasl %r14,__tls_get_offset@plt -> brcl 0,. + write16be(loc, 0xc004); + write32be(loc + 2, 0x00000000); + break; + case R_390_TLS_LDM64: + break; + case R_390_TLS_LDO64: + relocateNoSym(loc, R_390_TLS_LE64, val); + break; + default: + llvm_unreachable("unsupported relocation for TLS LD to LE relaxation"); + } +} + +RelExpr SystemZ::adjustGotPcExpr(RelType type, int64_t addend, + const uint8_t *loc) const { + // Only R_390_GOTENT with addend 2 can be relaxed. + if (!config->relax || addend != 2 || type != R_390_GOTENT) + return R_GOT_PC; + const uint16_t op = read16be(loc - 2); + + // lgrl rx,sym@GOTENT -> larl rx, sym + // This relaxation is legal if "sym" binds locally (which was already + // verified by our caller) and is in-range and properly aligned for a + // LARL instruction. We cannot verify the latter constraint here, so + // we assume it is true and revert the decision later on in relaxOnce + // if necessary. + if ((op & 0xff0f) == 0xc408) + return R_RELAX_GOT_PC; + + return R_GOT_PC; +} + +bool SystemZ::relaxOnce(int pass) const { + // If we decided in adjustGotPcExpr to relax a R_390_GOTENT, + // we need to validate the target symbol is in-range and aligned. + SmallVector storage; + bool changed = false; + for (OutputSection *osec : outputSections) { + if (!(osec->flags & SHF_EXECINSTR)) + continue; + for (InputSection *sec : getInputSections(*osec, storage)) { + for (Relocation &rel : sec->relocs()) { + if (rel.expr != R_RELAX_GOT_PC) + continue; + + uint64_t v = sec->getRelocTargetVA( + sec->file, rel.type, rel.addend, + sec->getOutputSection()->addr + rel.offset, *rel.sym, rel.expr); + if (isInt<33>(v) && !(v & 1)) + continue; + if (rel.sym->auxIdx == 0) { + rel.sym->allocateAux(); + addGotEntry(*rel.sym); + changed = true; + } + rel.expr = R_GOT_PC; + } + } + } + return changed; +} + +void SystemZ::relaxGot(uint8_t *loc, const Relocation &rel, + uint64_t val) const { + assert(isInt<33>(val) && + "R_390_GOTENT should not have been relaxed if it overflows"); + assert(!(val & 1) && + "R_390_GOTENT should not have been relaxed if it is misaligned"); + const uint16_t op = read16be(loc - 2); + + // lgrl rx,sym@GOTENT -> larl rx, sym + if ((op & 0xff0f) == 0xc408) { + write16be(loc - 2, 0xc000 | (op & 0x00f0)); + write32be(loc, val >> 1); + } +} + +void SystemZ::relocate(uint8_t *loc, const Relocation &rel, + uint64_t val) const { + switch (rel.expr) { + case R_RELAX_GOT_PC: + return relaxGot(loc, rel, val); + case R_RELAX_TLS_GD_TO_IE_GOT_OFF: + return relaxTlsGdToIe(loc, rel, val); + case R_RELAX_TLS_GD_TO_LE: + return relaxTlsGdToLe(loc, rel, val); + case R_RELAX_TLS_LD_TO_LE: + return relaxTlsLdToLe(loc, rel, val); + default: + break; + } + switch (rel.type) { + case R_390_8: + checkIntUInt(loc, val, 8, rel); + *loc = val; + break; + case R_390_12: + case R_390_GOT12: + case R_390_GOTPLT12: + case R_390_TLS_GOTIE12: + checkUInt(loc, val, 12, rel); + write16be(loc, (read16be(loc) & 0xF000) | val); + break; + case R_390_PC12DBL: + case R_390_PLT12DBL: + checkInt(loc, val, 13, rel); + checkAlignment(loc, val, 2, rel); + write16be(loc, (read16be(loc) & 0xF000) | ((val >> 1) & 0x0FFF)); + break; + case R_390_16: + case R_390_GOT16: + case R_390_GOTPLT16: + case R_390_GOTOFF16: + case R_390_PLTOFF16: + checkIntUInt(loc, val, 16, rel); + write16be(loc, val); + break; + case R_390_PC16: + checkInt(loc, val, 16, rel); + write16be(loc, val); + break; + case R_390_PC16DBL: + case R_390_PLT16DBL: + checkInt(loc, val, 17, rel); + checkAlignment(loc, val, 2, rel); + write16be(loc, val >> 1); + break; + case R_390_20: + case R_390_GOT20: + case R_390_GOTPLT20: + case R_390_TLS_GOTIE20: + checkInt(loc, val, 20, rel); + write32be(loc, (read32be(loc) & 0xF00000FF) | ((val & 0xFFF) << 16) | + ((val & 0xFF000) >> 4)); + break; + case R_390_PC24DBL: + case R_390_PLT24DBL: + checkInt(loc, val, 25, rel); + checkAlignment(loc, val, 2, rel); + loc[0] = val >> 17; + loc[1] = val >> 9; + loc[2] = val >> 1; + break; + case R_390_32: + case R_390_GOT32: + case R_390_GOTPLT32: + case R_390_GOTOFF: + case R_390_PLTOFF32: + case R_390_TLS_IE32: + case R_390_TLS_GOTIE32: + case R_390_TLS_GD32: + case R_390_TLS_LDM32: + case R_390_TLS_LDO32: + case R_390_TLS_LE32: + checkIntUInt(loc, val, 32, rel); + write32be(loc, val); + break; + case R_390_PC32: + case R_390_PLT32: + checkInt(loc, val, 32, rel); + write32be(loc, val); + break; + case R_390_PC32DBL: + case R_390_PLT32DBL: + case R_390_GOTPCDBL: + case R_390_GOTENT: + case R_390_GOTPLTENT: + case R_390_TLS_IEENT: + checkInt(loc, val, 33, rel); + checkAlignment(loc, val, 2, rel); + write32be(loc, val >> 1); + break; + case R_390_64: + case R_390_PC64: + case R_390_PLT64: + case R_390_GOT64: + case R_390_GOTPLT64: + case R_390_GOTOFF64: + case R_390_PLTOFF64: + case R_390_GOTPC: + case R_390_TLS_IE64: + case R_390_TLS_GOTIE64: + case R_390_TLS_GD64: + case R_390_TLS_LDM64: + case R_390_TLS_LDO64: + case R_390_TLS_LE64: + case R_390_TLS_DTPMOD: + case R_390_TLS_DTPOFF: + case R_390_TLS_TPOFF: + write64be(loc, val); + break; + case R_390_TLS_LOAD: + case R_390_TLS_GDCALL: + case R_390_TLS_LDCALL: + break; + default: + llvm_unreachable("unknown relocation"); + } +} + +TargetInfo *elf::getSystemZTargetInfo() { + static SystemZ t; + return &t; +} diff --git a/lld/ELF/CMakeLists.txt b/lld/ELF/CMakeLists.txt index 475f7dea1dd7e9..83d816ddb0601e 100644 --- a/lld/ELF/CMakeLists.txt +++ b/lld/ELF/CMakeLists.txt @@ -33,6 +33,7 @@ add_lld_library(lldELF Arch/PPC64.cpp Arch/RISCV.cpp Arch/SPARCV9.cpp + Arch/SystemZ.cpp Arch/X86.cpp Arch/X86_64.cpp ARMErrataFix.cpp diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index f4b7d1c9d5b973..8b2c32b1534821 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -200,6 +200,7 @@ static std::tuple parseEmulation(StringRef emul) { .Case("msp430elf", {ELF32LEKind, EM_MSP430}) .Case("elf64_amdgpu", {ELF64LEKind, EM_AMDGPU}) .Case("elf64loongarch", {ELF64LEKind, EM_LOONGARCH}) + .Case("elf64_s390", {ELF64BEKind, EM_S390}) .Default({ELFNoneKind, EM_NONE}); if (ret.first == ELFNoneKind) @@ -1137,7 +1138,7 @@ static SmallVector getSymbolOrderingFile(MemoryBufferRef mb) { static bool getIsRela(opt::InputArgList &args) { // The psABI specifies the default relocation entry format. bool rela = is_contained({EM_AARCH64, EM_AMDGPU, EM_HEXAGON, EM_LOONGARCH, - EM_PPC, EM_PPC64, EM_RISCV, EM_X86_64}, + EM_PPC, EM_PPC64, EM_RISCV, EM_S390, EM_X86_64}, config->emachine); // If -z rel or -z rela is specified, use the last option. for (auto *arg : args.filtered(OPT_z)) { diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp index a292e873e72f71..6c7ef27cbd4942 100644 --- a/lld/ELF/InputFiles.cpp +++ b/lld/ELF/InputFiles.cpp @@ -1614,6 +1614,8 @@ static uint16_t getBitcodeMachineKind(StringRef path, const Triple &t) { return EM_RISCV; case Triple::sparcv9: return EM_SPARCV9; + case Triple::systemz: + return EM_S390; case Triple::x86: return t.isOSIAMCU() ? EM_IAMCU : EM_386; case Triple::x86_64: diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp index 0e0b9783bd88a0..e033a715b59214 100644 --- a/lld/ELF/InputSection.cpp +++ b/lld/ELF/InputSection.cpp @@ -354,9 +354,10 @@ InputSectionBase *InputSection::getRelocatedSection() const { template void InputSection::copyRelocations(uint8_t *buf) { - if (config->relax && !config->relocatable && config->emachine == EM_RISCV) { - // On RISC-V, relaxation might change relocations: copy from internal ones - // that are updated by relaxation. + if (config->relax && !config->relocatable && + (config->emachine == EM_RISCV || config->emachine == EM_LOONGARCH)) { + // On LoongArch and RISC-V, relaxation might change relocations: copy + // from internal ones that are updated by relaxation. InputSectionBase *sec = getRelocatedSection(); copyRelocations(buf, llvm::make_range(sec->relocations.begin(), sec->relocations.end())); @@ -654,6 +655,7 @@ static int64_t getTlsTpOffset(const Symbol &s) { // Variant 2. case EM_HEXAGON: + case EM_S390: case EM_SPARCV9: case EM_386: case EM_X86_64: @@ -716,6 +718,10 @@ uint64_t InputSectionBase::getRelocTargetVA(const InputFile *file, RelType type, case R_GOT_PC: case R_RELAX_TLS_GD_TO_IE: return sym.getGotVA() + a - p; + case R_GOTPLT_GOTREL: + return sym.getGotPltVA() + a - in.got->getVA(); + case R_GOTPLT_PC: + return sym.getGotPltVA() + a - p; case R_LOONGARCH_GOT_PAGE_PC: if (sym.hasFlag(NEEDS_TLSGD)) return getLoongArchPageDelta(in.got->getGlobalDynAddr(sym) + a, p, type); @@ -807,6 +813,8 @@ uint64_t InputSectionBase::getRelocTargetVA(const InputFile *file, RelType type, return getLoongArchPageDelta(sym.getPltVA() + a, p, type); case R_PLT_GOTPLT: return sym.getPltVA() + a - in.gotPlt->getVA(); + case R_PLT_GOTREL: + return sym.getPltVA() + a - in.got->getVA(); case R_PPC32_PLTREL: // R_PPC_PLTREL24 uses the addend (usually 0 or 0x8000) to indicate r30 // stores _GLOBAL_OFFSET_TABLE_ or .got2+0x8000. The addend is ignored for diff --git a/lld/ELF/InputSection.h b/lld/ELF/InputSection.h index dda4242d8be1c1..243b28d90bb4c1 100644 --- a/lld/ELF/InputSection.h +++ b/lld/ELF/InputSection.h @@ -102,7 +102,23 @@ class SectionBase { link(link), info(info) {} }; -struct RISCVRelaxAux; +struct SymbolAnchor { + uint64_t offset; + Defined *d; + bool end; // true for the anchor of st_value+st_size +}; + +struct RelaxAux { + // This records symbol start and end offsets which will be adjusted according + // to the nearest relocDeltas element. + SmallVector anchors; + // For relocations[i], the actual offset is + // r_offset - (i ? relocDeltas[i-1] : 0). + std::unique_ptr relocDeltas; + // For relocations[i], the actual type is relocTypes[i]. + std::unique_ptr relocTypes; + SmallVector writes; +}; // This corresponds to a section of an input file. class InputSectionBase : public SectionBase { @@ -226,9 +242,9 @@ class InputSectionBase : public SectionBase { // basic blocks. JumpInstrMod *jumpInstrMod = nullptr; - // Auxiliary information for RISC-V linker relaxation. RISC-V does not use - // jumpInstrMod. - RISCVRelaxAux *relaxAux; + // Auxiliary information for RISC-V and LoongArch linker relaxation. + // They do not use jumpInstrMod. + RelaxAux *relaxAux; // The compressed content size when `compressed` is true. size_t compressedSize; diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp index 79c8230724ade7..619fbaf5dc5452 100644 --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -203,8 +203,9 @@ static bool isAbsoluteValue(const Symbol &sym) { // Returns true if Expr refers a PLT entry. static bool needsPlt(RelExpr expr) { - return oneof(expr); + return oneof(expr); } bool lld::elf::needsGot(RelExpr expr) { @@ -233,6 +234,8 @@ static RelExpr toPlt(RelExpr expr) { return R_PLT_PC; case R_ABS: return R_PLT; + case R_GOTREL: + return R_PLT_GOTREL; default: return expr; } @@ -253,6 +256,8 @@ static RelExpr fromPlt(RelExpr expr) { return R_ABS; case R_PLT_GOTPLT: return R_GOTPLTREL; + case R_PLT_GOTREL: + return R_GOTREL; default: return expr; } @@ -935,7 +940,7 @@ void elf::addGotEntry(Symbol &sym) { static void addTpOffsetGotEntry(Symbol &sym) { in.got->addEntry(sym); uint64_t off = sym.getGotOffset(); - if (!sym.isPreemptible && !config->isPic) { + if (!sym.isPreemptible && !config->shared) { in.got->addConstant({R_TPREL, target->symbolicRel, off, 0, &sym}); return; } @@ -979,10 +984,10 @@ bool RelocationScanner::isStaticLinkTimeConstant(RelExpr e, RelType type, if (oneof( - e)) + R_PLT_PC, R_PLT_GOTREL, R_PLT_GOTPLT, R_GOTPLT_GOTREL, R_GOTPLT_PC, + R_PPC32_PLTREL, R_PPC64_CALL_PLT, R_PPC64_RELAX_TOC, R_RISCV_ADD, + R_AARCH64_GOT_PAGE, R_LOONGARCH_PLT_PAGE_PC, R_LOONGARCH_GOT, + R_LOONGARCH_GOT_PAGE_PC>(e)) return true; // These never do, except if the entire file is position dependent or if @@ -1374,8 +1379,8 @@ static unsigned handleTlsRelocation(RelType type, Symbol &sym, R_LOONGARCH_GOT_PAGE_PC, R_GOT_OFF, R_TLSIE_HINT>(expr)) { ctx.hasTlsIe.store(true, std::memory_order_relaxed); // Initial-Exec relocs can be optimized to Local-Exec if the symbol is - // locally defined. - if (execOptimize && isLocalInExecutable) { + // locally defined. This is not supported on SystemZ. + if (execOptimize && isLocalInExecutable && config->emachine != EM_S390) { c.addReloc({R_RELAX_TLS_IE_TO_LE, type, offset, addend, &sym}); } else if (expr != R_TLSIE_HINT) { sym.setFlags(NEEDS_TLSIE); @@ -1534,8 +1539,10 @@ void RelocationScanner::scan(ArrayRef rels) { // For EhInputSection, OffsetGetter expects the relocations to be sorted by // r_offset. In rare cases (.eh_frame pieces are reordered by a linker // script), the relocations may be unordered. + // On SystemZ, all sections need to be sorted by r_offset, to allow TLS + // relaxation to be handled correctly - see SystemZ::getTlsGdRelaxSkip. SmallVector storage; - if (isa(sec)) + if (isa(sec) || config->emachine == EM_S390) rels = sortRels(rels, storage); end = static_cast(rels.end()); diff --git a/lld/ELF/Relocations.h b/lld/ELF/Relocations.h index cfb9092149f3e0..7eb8a811e6934f 100644 --- a/lld/ELF/Relocations.h +++ b/lld/ELF/Relocations.h @@ -40,11 +40,14 @@ enum RelExpr { R_GOTPLT, R_GOTPLTREL, R_GOTREL, + R_GOTPLT_GOTREL, + R_GOTPLT_PC, R_NONE, R_PC, R_PLT, R_PLT_PC, R_PLT_GOTPLT, + R_PLT_GOTREL, R_RELAX_HINT, R_RELAX_GOT_PC, R_RELAX_GOT_PC_NOPIC, diff --git a/lld/ELF/ScriptParser.cpp b/lld/ELF/ScriptParser.cpp index dd69916d6b05e8..f0ede1f43bbdb3 100644 --- a/lld/ELF/ScriptParser.cpp +++ b/lld/ELF/ScriptParser.cpp @@ -445,6 +445,7 @@ static std::pair parseBfdName(StringRef s) { .Case("elf32-msp430", {ELF32LEKind, EM_MSP430}) .Case("elf32-loongarch", {ELF32LEKind, EM_LOONGARCH}) .Case("elf64-loongarch", {ELF64LEKind, EM_LOONGARCH}) + .Case("elf64-s390", {ELF64BEKind, EM_S390}) .Default({ELFNoneKind, EM_NONE}); } diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp index 4b413163314b2e..bada394aa30d7d 100644 --- a/lld/ELF/SyntheticSections.cpp +++ b/lld/ELF/SyntheticSections.cpp @@ -1419,6 +1419,9 @@ DynamicSection::computeContents() { case EM_MIPS: addInSec(DT_MIPS_PLTGOT, *in.gotPlt); break; + case EM_S390: + addInSec(DT_PLTGOT, *in.got); + break; case EM_SPARCV9: addInSec(DT_PLTGOT, *in.plt); break; diff --git a/lld/ELF/Target.cpp b/lld/ELF/Target.cpp index 671d22cc66a0e9..b7922425a34e43 100644 --- a/lld/ELF/Target.cpp +++ b/lld/ELF/Target.cpp @@ -87,6 +87,8 @@ TargetInfo *elf::getTarget() { return getRISCVTargetInfo(); case EM_SPARCV9: return getSPARCV9TargetInfo(); + case EM_S390: + return getSystemZTargetInfo(); case EM_X86_64: return getX86_64TargetInfo(); } diff --git a/lld/ELF/Target.h b/lld/ELF/Target.h index ab6b6b9c013ba3..0cefa318135662 100644 --- a/lld/ELF/Target.h +++ b/lld/ELF/Target.h @@ -95,6 +95,8 @@ class TargetInfo { // Do a linker relaxation pass and return true if we changed something. virtual bool relaxOnce(int pass) const { return false; } + // Do finalize relaxation after collecting relaxation infos. + virtual void finalizeRelax(int passes) const {} virtual void applyJumpInstrMod(uint8_t *loc, JumpModType type, JumpModType val) const {} @@ -186,6 +188,7 @@ TargetInfo *getPPC64TargetInfo(); TargetInfo *getPPCTargetInfo(); TargetInfo *getRISCVTargetInfo(); TargetInfo *getSPARCV9TargetInfo(); +TargetInfo *getSystemZTargetInfo(); TargetInfo *getX86TargetInfo(); TargetInfo *getX86_64TargetInfo(); template TargetInfo *getMipsTargetInfo(); @@ -236,6 +239,7 @@ void addArmSyntheticSectionMappingSymbol(Defined *); void sortArmMappingSymbols(); void convertArmInstructionstoBE8(InputSection *sec, uint8_t *buf); void createTaggedSymbols(const SmallVector &files); +void initSymbolAnchors(); LLVM_LIBRARY_VISIBILITY extern const TargetInfo *target; TargetInfo *getTarget(); diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp index 501c10f358497b..6df43a34be013a 100644 --- a/lld/ELF/Writer.cpp +++ b/lld/ELF/Writer.cpp @@ -1752,8 +1752,8 @@ template void Writer::finalizeAddressDependentContent() { } } } - if (!config->relocatable && config->emachine == EM_RISCV) - riscvFinalizeRelax(pass); + if (!config->relocatable) + target->finalizeRelax(pass); if (config->relocatable) for (OutputSection *sec : outputSections) diff --git a/lld/MinGW/Driver.cpp b/lld/MinGW/Driver.cpp index 4752d92e3b1d71..7b16764dd2c7ce 100644 --- a/lld/MinGW/Driver.cpp +++ b/lld/MinGW/Driver.cpp @@ -448,6 +448,10 @@ bool link(ArrayRef argsArr, llvm::raw_ostream &stdoutOS, add("-lto-cs-profile-generate"); if (auto *arg = args.getLastArg(OPT_lto_cs_profile_file)) add("-lto-cs-profile-file:" + StringRef(arg->getValue())); + if (args.hasArg(OPT_plugin_opt_emit_llvm)) + add("-lldemit:llvm"); + if (args.hasArg(OPT_lto_emit_asm)) + add("-lldemit:asm"); if (auto *a = args.getLastArg(OPT_thinlto_cache_dir)) add("-lldltocache:" + StringRef(a->getValue())); diff --git a/lld/MinGW/Options.td b/lld/MinGW/Options.td index 02f00f27406c08..9a0a96aac7f1c6 100644 --- a/lld/MinGW/Options.td +++ b/lld/MinGW/Options.td @@ -158,6 +158,8 @@ def lto_cs_profile_generate: FF<"lto-cs-profile-generate">, HelpText<"Perform context sensitive PGO instrumentation">; def lto_cs_profile_file: JJ<"lto-cs-profile-file=">, HelpText<"Context sensitive profile file path">; +def lto_emit_asm: FF<"lto-emit-asm">, + HelpText<"Emit assembly code">; def thinlto_cache_dir: JJ<"thinlto-cache-dir=">, HelpText<"Path to ThinLTO cached object file directory">; @@ -181,6 +183,9 @@ def: J<"plugin-opt=cs-profile-path=">, Alias, HelpText<"Alias for --lto-cs-profile-file">; def plugin_opt_dwo_dir_eq: J<"plugin-opt=dwo_dir=">, HelpText<"Directory to store .dwo files when LTO and debug fission are used">; +def plugin_opt_emit_asm: F<"plugin-opt=emit-asm">, + Alias, HelpText<"Alias for --lto-emit-asm">; +def plugin_opt_emit_llvm: F<"plugin-opt=emit-llvm">; def: J<"plugin-opt=jobs=">, Alias, HelpText<"Alias for --thinlto-jobs=">; def plugin_opt_mcpu_eq: J<"plugin-opt=mcpu=">; diff --git a/lld/docs/ReleaseNotes.rst b/lld/docs/ReleaseNotes.rst index fa0e7f2bc0b3ea..56ba3463aeadc0 100644 --- a/lld/docs/ReleaseNotes.rst +++ b/lld/docs/ReleaseNotes.rst @@ -82,9 +82,46 @@ COFF Improvements * Added support for ``--time-trace`` and associated ``--time-trace-granularity``. This generates a .json profile trace of the linker execution. + (`#68236 `_) + +* The ``-dependentloadflag`` option was implemented. + (`#71537 `_) * LLD now prefers library paths specified with ``-libpath:`` over the implicitly detected toolchain paths. + (`#78039 `_) + +* Added new options ``-lldemit:llvm`` and ``-lldemit:asm`` for getting + the output of LTO compilation as LLVM bitcode or assembly. + (`#66964 `_) + (`#67079 `_) + +* Added a new option ``-build-id`` for generating a ``.buildid`` section + when not generating a PDB. A new symbol ``__buildid`` is generated by + the linker, allowing code to reference the build ID of the binary. + (`#71433 `_) + (`#74652 `_) + +* A new, LLD specific option, ``-lld-allow-duplicate-weak``, was added + for allowing duplicate weak symbols. + (`#68077 `_) + +* More correctly handle LTO of files that define ``__imp_`` prefixed dllimport + redirections. + (`#70777 `_) + (`#71376 `_) + (`#72989 `_) + +* Linking undefined references to weak symbols with LTO now works. + (`#70430 `_) + +* Use the ``SOURCE_DATE_EPOCH`` environment variable for the PE header and + debug directory timestamps, if neither the ``/Brepro`` nor ``/timestamp:`` + options have been specified. This makes the linker output reproducible by + setting this environment variable. + (`#81326 `_) + +* Lots of incremental work towards supporting linking ARM64EC binaries. MinGW Improvements ------------------ @@ -92,19 +129,29 @@ MinGW Improvements * Added support for many LTO and ThinLTO options (most LTO options supported by the ELF driver, that are implemented by the COFF backend as well, should be supported now). + (`D158412 `_) + (`D158887 `_) + (`#77387 `_) + (`#81475 `_) * LLD no longer tries to autodetect and use library paths from MSVC/WinSDK installations when run in MinGW mode; that mode of operation shouldn't ever be needed in MinGW mode, and could be a source of unexpected behaviours. + (`D144084 `_) * The ``--icf=safe`` option now works as expected; it was previously a no-op. - -* More correctly handle LTO of files that define ``__imp_`` prefixed dllimport - redirections. + (`#70037 `_) * The strip flags ``-S`` and ``-s`` now can be used to strip out DWARF debug info and symbol tables while emitting a PDB debug info file. + (`#75181 `_) + +* The option ``--dll`` is handled as an alias for the ``--shared`` option. + (`#68575 `_) + +* The option ``--sort-common`` is ignored now. + (`#66336 `_) MachO Improvements ------------------ diff --git a/lld/test/COFF/lto-cache-errors.ll b/lld/test/COFF/lto-cache-errors.ll index 55244e5690dc34..a46190a81b6230 100644 --- a/lld/test/COFF/lto-cache-errors.ll +++ b/lld/test/COFF/lto-cache-errors.ll @@ -1,4 +1,4 @@ -; REQUIRES: x86 +; REQUIRES: x86, non-root-user ;; Not supported on windows since we use permissions to deny the creation ; UNSUPPORTED: system-windows diff --git a/lld/test/COFF/thinlto-emit-imports.ll b/lld/test/COFF/thinlto-emit-imports.ll index a9f22c1dc2dcff..b47a6cea4eb7df 100644 --- a/lld/test/COFF/thinlto-emit-imports.ll +++ b/lld/test/COFF/thinlto-emit-imports.ll @@ -1,4 +1,4 @@ -; REQUIRES: x86 +; REQUIRES: x86, non-root-user ; Generate summary sections and test lld handling. ; RUN: opt -module-summary %s -o %t1.obj diff --git a/lld/test/COFF/timestamp.test b/lld/test/COFF/timestamp.test index c0658d6109811b..cc73af13c38ca6 100644 --- a/lld/test/COFF/timestamp.test +++ b/lld/test/COFF/timestamp.test @@ -4,19 +4,28 @@ RUN: lld-link %t.obj /debug /Brepro /entry:main /nodefaultlib /out:%t.1.exe RUN: lld-link %t.obj /debug /Brepro /entry:main /nodefaultlib /out:%t.2.exe RUN: lld-link %t.obj /debug /timestamp:0 /entry:main /nodefaultlib /out:%t.3.exe RUN: env SOURCE_DATE_EPOCH=0 lld-link %t.obj /debug /entry:main /nodefaultlib /out:%t.4.exe -RUN: lld-link %t.obj /debug /timestamp:4294967295 /entry:main /nodefaultlib /out:%t.5.exe -RUN: env SOURCE_DATE_EPOCH=4294967295 lld-link %t.obj /debug /entry:main /nodefaultlib /out:%t.6.exe +# Test timestamps corresponding to INT32_TMAX +RUN: lld-link %t.obj /debug /timestamp:2147483647 /entry:main /nodefaultlib /out:%t.5.exe +RUN: env SOURCE_DATE_EPOCH=2147483647 lld-link %t.obj /debug /entry:main /nodefaultlib /out:%t.6.exe +# Test that the command line option /timestamp has precedence over SOURCE_DATE_EPOCH RUN: env SOURCE_DATE_EPOCH=12345 lld-link %t.obj /debug /timestamp:0 /entry:main /nodefaultlib /out:%t.7.exe -RUN: env LLD_IN_TEST=1 not lld-link %t.obj /debug /timestamp:4294967296 /entry:main /nodefaultlib /out:%t.8.exe 2>&1 | FileCheck %s --check-prefix=ERROR -RUN: env SOURCE_DATE_EPOCH=4294967296 env LLD_IN_TEST=1 not lld-link %t.obj /debug /entry:main /nodefaultlib /out:%t.9.exe 2>&1 | FileCheck %s --check-prefix=ERROR2 +# Test timestamps corresponding to UINT32_TMAX +RUN: lld-link %t.obj /debug /timestamp:4294967295 /entry:main /nodefaultlib /out:%t.8.exe +RUN: env SOURCE_DATE_EPOCH=4294967295 lld-link %t.obj /debug /entry:main /nodefaultlib /out:%t.9.exe +# Test that setting UINT32_MAX+1 as timestamp fails. +RUN: env LLD_IN_TEST=1 not lld-link %t.obj /debug /timestamp:4294967296 /entry:main /nodefaultlib /out:%t.10.exe 2>&1 | FileCheck %s --check-prefix=ERROR +RUN: env SOURCE_DATE_EPOCH=4294967296 env LLD_IN_TEST=1 not lld-link %t.obj /debug /entry:main /nodefaultlib /out:%t.11.exe 2>&1 | FileCheck %s --check-prefix=ERROR2 RUN: llvm-readobj --file-headers --coff-debug-directory %t.1.exe | FileCheck %s --check-prefix=HASH RUN: llvm-readobj --file-headers --coff-debug-directory %t.2.exe | FileCheck %s --check-prefix=HASH RUN: llvm-readobj --file-headers --coff-debug-directory %t.3.exe | FileCheck %s --check-prefix=ZERO RUN: llvm-readobj --file-headers --coff-debug-directory %t.4.exe | FileCheck %s --check-prefix=ZERO -RUN: llvm-readobj --file-headers --coff-debug-directory %t.5.exe | FileCheck %s --check-prefix=MAX -RUN: llvm-readobj --file-headers --coff-debug-directory %t.6.exe | FileCheck %s --check-prefix=MAX +RUN: llvm-readobj --file-headers --coff-debug-directory %t.5.exe | FileCheck %s --check-prefix=LARGE +RUN: llvm-readobj --file-headers --coff-debug-directory %t.6.exe | FileCheck %s --check-prefix=LARGE RUN: llvm-readobj --file-headers --coff-debug-directory %t.7.exe | FileCheck %s --check-prefix=ZERO +# Not inspecting %t.8.exe and %t.9.exe; llvm-readobj with a 32 bit time_t fails to print dates +# past INT32_MAX correctly. + HASH: ImageFileHeader { HASH: TimeDateStamp: [[STAMP:.*]] HASH: DebugDirectory [ @@ -27,10 +36,10 @@ ZERO: TimeDateStamp: 1970-01-01 00:00:00 (0x0) ZERO: DebugDirectory [ ZERO: TimeDateStamp: 1970-01-01 00:00:00 (0x0) -MAX: ImageFileHeader { -MAX: TimeDateStamp: 2106-02-07 06:28:15 (0xFFFFFFFF) -MAX: DebugDirectory [ -MAX: TimeDateStamp: 2106-02-07 06:28:15 (0xFFFFFFFF) +LARGE: ImageFileHeader { +LARGE: TimeDateStamp: 2038-01-19 03:14:07 (0x7FFFFFFF) +LARGE: DebugDirectory [ +LARGE: TimeDateStamp: 2038-01-19 03:14:07 (0x7FFFFFFF) ERROR: error: invalid timestamp: 4294967296. Expected 32-bit integer ERROR2: error: invalid SOURCE_DATE_EPOCH timestamp: 4294967296. Expected 32-bit integer diff --git a/lld/test/ELF/Inputs/systemz-init.s b/lld/test/ELF/Inputs/systemz-init.s new file mode 100644 index 00000000000000..1611b69b4419e3 --- /dev/null +++ b/lld/test/ELF/Inputs/systemz-init.s @@ -0,0 +1,5 @@ +// glibc < 2.39 used to align .init and .fini code at a 4-byte boundary. +// This file aims to recreate that behavior. + .section .init,"ax",@progbits + .align 4 + lg %r4, 272(%r15) diff --git a/lld/test/ELF/basic-systemz.s b/lld/test/ELF/basic-systemz.s new file mode 100644 index 00000000000000..f7bb0e8cbd020d --- /dev/null +++ b/lld/test/ELF/basic-systemz.s @@ -0,0 +1,63 @@ +# REQUIRES: systemz +# RUN: llvm-mc -filetype=obj -triple=s390x-unknown-linux %s -o %t.o +# RUN: ld.lld --hash-style=sysv -discard-all -shared %t.o -o %t.so +# RUN: llvm-readelf --file-header --program-headers --section-headers --dynamic-table %t.so | FileCheck %s + +# Exits with return code 55 on linux. +.text + lghi 2,55 + svc 1 + +# CHECK: ELF Header: +# CHECK-NEXT: Magic: 7f 45 4c 46 02 02 01 00 00 00 00 00 00 00 00 00 +# CHECK-NEXT: Class: ELF64 +# CHECK-NEXT: Data: 2's complement, big endian +# CHECK-NEXT: Version: 1 (current) +# CHECK-NEXT: OS/ABI: UNIX - System V +# CHECK-NEXT: ABI Version: 0 +# CHECK-NEXT: Type: DYN (Shared object file) +# CHECK-NEXT: Machine: IBM S/390 +# CHECK-NEXT: Version: 0x1 +# CHECK-NEXT: Entry point address: 0x0 +# CHECK-NEXT: Start of program headers: 64 (bytes into file) +# CHECK-NEXT: Start of section headers: 768 (bytes into file) +# CHECK-NEXT: Flags: 0x0 +# CHECK-NEXT: Size of this header: 64 (bytes) +# CHECK-NEXT: Size of program headers: 56 (bytes) +# CHECK-NEXT: Number of program headers: 7 +# CHECK-NEXT: Size of section headers: 64 (bytes) +# CHECK-NEXT: Number of section headers: 11 +# CHECK-NEXT: Section header string table index: 9 + +# CHECK: Section Headers: +# CHECK-NEXT: [Nr] Name Type Address Off Size ES Flg Lk Inf Al +# CHECK-NEXT: [ 0] NULL 0000000000000000 000000 000000 00 0 0 0 +# CHECK-NEXT: [ 1] .dynsym DYNSYM 00000000000001c8 0001c8 000018 18 A 3 1 8 +# CHECK-NEXT: [ 2] .hash HASH 00000000000001e0 0001e0 000010 04 A 1 0 4 +# CHECK-NEXT: [ 3] .dynstr STRTAB 00000000000001f0 0001f0 000001 00 A 0 0 1 +# CHECK-NEXT: [ 4] .text PROGBITS 00000000000011f4 0001f4 000006 00 AX 0 0 4 +# CHECK-NEXT: [ 5] .dynamic DYNAMIC 0000000000002200 000200 000060 10 WA 3 0 8 +# CHECK-NEXT: [ 6] .relro_padding NOBITS 0000000000002260 000260 000da0 00 WA 0 0 1 +# CHECK-NEXT: [ 7] .comment PROGBITS 0000000000000000 000260 000008 01 MS 0 0 1 +# CHECK-NEXT: [ 8] .symtab SYMTAB 0000000000000000 000268 000030 18 10 2 8 +# CHECK-NEXT: [ 9] .shstrtab STRTAB 0000000000000000 000298 000058 00 0 0 1 +# CHECK-NEXT: [10] .strtab STRTAB 0000000000000000 0002f0 00000a 00 0 0 1 + +# CHECK: Program Headers: +# CHECK-NEXT: Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align +# CHECK-NEXT: PHDR 0x000040 0x0000000000000040 0x0000000000000040 0x000188 0x000188 R 0x8 +# CHECK-NEXT: LOAD 0x000000 0x0000000000000000 0x0000000000000000 0x0001f1 0x0001f1 R 0x1000 +# CHECK-NEXT: LOAD 0x0001f4 0x00000000000011f4 0x00000000000011f4 0x000006 0x000006 R E 0x1000 +# CHECK-NEXT: LOAD 0x000200 0x0000000000002200 0x0000000000002200 0x000060 0x000e00 RW 0x1000 +# CHECK-NEXT: DYNAMIC 0x000200 0x0000000000002200 0x0000000000002200 0x000060 0x000060 RW 0x8 +# CHECK-NEXT: GNU_RELRO 0x000200 0x0000000000002200 0x0000000000002200 0x000060 0x000e00 R 0x1 +# CHECK-NEXT: GNU_STACK 0x000000 0x0000000000000000 0x0000000000000000 0x000000 0x000000 RW 0x0 + +# CHECK: Dynamic section at offset 0x200 contains 6 entries: +# CHECK-NEXT: Tag Type Name/Value +# CHECK-NEXT: 0x0000000000000006 (SYMTAB) 0x1c8 +# CHECK-NEXT: 0x000000000000000b (SYMENT) 24 (bytes) +# CHECK-NEXT: 0x0000000000000005 (STRTAB) 0x1f0 +# CHECK-NEXT: 0x000000000000000a (STRSZ) 1 (bytes) +# CHECK-NEXT: 0x0000000000000004 (HASH) 0x1e0 +# CHECK-NEXT: 0x0000000000000000 (NULL) 0x0 diff --git a/lld/test/ELF/emulation-systemz.s b/lld/test/ELF/emulation-systemz.s new file mode 100644 index 00000000000000..dfdb4620954c8a --- /dev/null +++ b/lld/test/ELF/emulation-systemz.s @@ -0,0 +1,29 @@ +# REQUIRES: systemz +# RUN: llvm-mc -filetype=obj -triple=s390x-unknown-linux %s -o %t.o +# RUN: ld.lld -m elf64_s390 %t.o -o %t1 +# RUN: llvm-readelf --file-header %t1 | FileCheck %s +# RUN: ld.lld %t.o -o %t2 +# RUN: llvm-readelf --file-header %t2 | FileCheck %s +# RUN: echo 'OUTPUT_FORMAT(elf64-s390)' > %t.script +# RUN: ld.lld %t.script %t.o -o %t3 +# RUN: llvm-readelf --file-header %t3 | FileCheck %s + +# CHECK: ELF Header: +# CHECK-NEXT: Magic: 7f 45 4c 46 02 02 01 00 00 00 00 00 00 00 00 00 +# CHECK-NEXT: Class: ELF64 +# CHECK-NEXT: Data: 2's complement, big endian +# CHECK-NEXT: Version: 1 (current) +# CHECK-NEXT: OS/ABI: UNIX - System V +# CHECK-NEXT: ABI Version: 0 +# CHECK-NEXT: Type: EXEC (Executable file) +# CHECK-NEXT: Machine: IBM S/390 +# CHECK-NEXT: Version: 0x1 +# CHECK-NEXT: Entry point address: +# CHECK-NEXT: Start of program headers: 64 (bytes into file) +# CHECK-NEXT: Start of section headers: +# CHECK-NEXT: Flags: 0x0 +# CHECK-NEXT: Size of this header: 64 (bytes) +# CHECK-NEXT: Size of program headers: 56 (bytes) + +.globl _start +_start: diff --git a/lld/test/ELF/loongarch-relax-align.s b/lld/test/ELF/loongarch-relax-align.s new file mode 100644 index 00000000000000..ab61e15d5caca2 --- /dev/null +++ b/lld/test/ELF/loongarch-relax-align.s @@ -0,0 +1,126 @@ +# REQUIRES: loongarch + +# RUN: llvm-mc --filetype=obj --triple=loongarch32 --mattr=+relax %s -o %t.32.o +# RUN: llvm-mc --filetype=obj --triple=loongarch64 --mattr=+relax %s -o %t.64.o +# RUN: ld.lld --section-start=.text=0x10000 --section-start=.text2=0x20000 -e 0 %t.32.o -o %t.32 +# RUN: ld.lld --section-start=.text=0x10000 --section-start=.text2=0x20000 -e 0 %t.64.o -o %t.64 +# RUN: ld.lld --section-start=.text=0x10000 --section-start=.text2=0x20000 -e 0 %t.32.o --no-relax -o %t.32n +# RUN: ld.lld --section-start=.text=0x10000 --section-start=.text2=0x20000 -e 0 %t.64.o --no-relax -o %t.64n +# RUN: llvm-objdump -td --no-show-raw-insn %t.32 | FileCheck %s +# RUN: llvm-objdump -td --no-show-raw-insn %t.64 | FileCheck %s +# RUN: llvm-objdump -td --no-show-raw-insn %t.32n | FileCheck %s +# RUN: llvm-objdump -td --no-show-raw-insn %t.64n | FileCheck %s + +## Test the R_LARCH_ALIGN without symbol index. +# RUN: llvm-mc --filetype=obj --triple=loongarch64 --mattr=+relax %s -o %t.o64.o --defsym=old=1 +# RUN: ld.lld --section-start=.text=0x10000 --section-start=.text2=0x20000 -e 0 %t.o64.o -o %t.o64 +# RUN: ld.lld --section-start=.text=0x10000 --section-start=.text2=0x20000 -e 0 %t.o64.o --no-relax -o %t.o64n +# RUN: llvm-objdump -td --no-show-raw-insn %t.o64 | FileCheck %s +# RUN: llvm-objdump -td --no-show-raw-insn %t.o64n | FileCheck %s + +## -r keeps section contents unchanged. +# RUN: ld.lld -r %t.64.o -o %t.64.r +# RUN: llvm-objdump -dr --no-show-raw-insn %t.64.r | FileCheck %s --check-prefix=CHECKR + +# CHECK-DAG: {{0*}}10000 l .text {{0*}}44 .Ltext_start +# CHECK-DAG: {{0*}}10038 l .text {{0*}}0c .L1 +# CHECK-DAG: {{0*}}10040 l .text {{0*}}04 .L2 +# CHECK-DAG: {{0*}}20000 l .text2 {{0*}}14 .Ltext2_start + +# CHECK: <.Ltext_start>: +# CHECK-NEXT: break 1 +# CHECK-NEXT: break 2 +# CHECK-NEXT: nop +# CHECK-NEXT: nop +# CHECK-NEXT: break 3 +# CHECK-NEXT: break 4 +# CHECK-NEXT: nop +# CHECK-NEXT: nop +# CHECK-NEXT: pcalau12i $a0, 0 +# CHECK-NEXT: addi.{{[dw]}} $a0, $a0, 0 +# CHECK-NEXT: pcalau12i $a0, 0 +# CHECK-NEXT: addi.{{[dw]}} $a0, $a0, 56 +# CHECK-NEXT: pcalau12i $a0, 0 +# CHECK-NEXT: addi.{{[dw]}} $a0, $a0, 64 +# CHECK-EMPTY: +# CHECK-NEXT: <.L1>: +# CHECK-NEXT: nop +# CHECK-NEXT: nop +# CHECK-EMPTY: +# CHECK-NEXT: <.L2>: +# CHECK-NEXT: break 5 + +# CHECK: <.Ltext2_start>: +# CHECK-NEXT: pcalau12i $a0, 0 +# CHECK-NEXT: addi.{{[dw]}} $a0, $a0, 0 +# CHECK-NEXT: nop +# CHECK-NEXT: nop +# CHECK-NEXT: break 6 + +# CHECKR: <.Ltext2_start>: +# CHECKR-NEXT: pcalau12i $a0, 0 +# CHECKR-NEXT: {{0*}}00: R_LARCH_PCALA_HI20 .Ltext2_start +# CHECKR-NEXT: {{0*}}00: R_LARCH_RELAX *ABS* +# CHECKR-NEXT: addi.d $a0, $a0, 0 +# CHECKR-NEXT: {{0*}}04: R_LARCH_PCALA_LO12 .Ltext2_start +# CHECKR-NEXT: {{0*}}04: R_LARCH_RELAX *ABS* +# CHECKR-NEXT: nop +# CHECKR-NEXT: {{0*}}08: R_LARCH_ALIGN .Lalign_symbol+0x4 +# CHECKR-NEXT: nop +# CHECKR-NEXT: nop +# CHECKR-NEXT: break 6 + +.macro .fake_p2align_4 max=0 + .ifdef old + .if \max==0 + .reloc ., R_LARCH_ALIGN, 0xc + nop; nop; nop + .endif + .else + .reloc ., R_LARCH_ALIGN, .Lalign_symbol + 0x4 + (\max << 8) + nop; nop; nop + .endif +.endm + + .text +.Lalign_symbol: +.Ltext_start: + break 1 + break 2 +## +0x8: Emit 2 nops, delete 1 nop. + .fake_p2align_4 + + break 3 +## +0x14: Emit 3 nops > 8 bytes, not emit. + .fake_p2align_4 8 + + break 4 + .fake_p2align_4 8 +## +0x18: Emit 2 nops <= 8 bytes. + +## Compensate +.ifdef old + nop; nop +.endif + +## +0x20: Test symbol value and symbol size can be handled. + la.pcrel $a0, .Ltext_start + la.pcrel $a0, .L1 + la.pcrel $a0, .L2 + +## +0x38: Emit 2 nops, delete 1 nop. +.L1: + .fake_p2align_4 +.L2: + break 5 + .size .L1, . - .L1 + .size .L2, . - .L2 + .size .Ltext_start, . - .Ltext_start + +## Test another text section. + .section .text2,"ax",@progbits +.Ltext2_start: + la.pcrel $a0, .Ltext2_start + .fake_p2align_4 + break 6 + .size .Ltext2_start, . - .Ltext2_start diff --git a/lld/test/ELF/loongarch-relax-emit-relocs.s b/lld/test/ELF/loongarch-relax-emit-relocs.s new file mode 100644 index 00000000000000..581fce8c95caa4 --- /dev/null +++ b/lld/test/ELF/loongarch-relax-emit-relocs.s @@ -0,0 +1,49 @@ +# REQUIRES: loongarch +## Test that we can handle --emit-relocs while relaxing. + +# RUN: llvm-mc --filetype=obj --triple=loongarch32 --mattr=+relax %s -o %t.32.o +# RUN: llvm-mc --filetype=obj --triple=loongarch64 --mattr=+relax %s -o %t.64.o +# RUN: ld.lld -Ttext=0x10000 --emit-relocs %t.32.o -o %t.32 +# RUN: ld.lld -Ttext=0x10000 --emit-relocs %t.64.o -o %t.64 +# RUN: llvm-objdump -dr %t.32 | FileCheck %s +# RUN: llvm-objdump -dr %t.64 | FileCheck %s + +## -r should keep original relocations. +# RUN: ld.lld -r %t.64.o -o %t.64.r +# RUN: llvm-objdump -dr %t.64.r | FileCheck %s --check-prefix=CHECKR + +## --no-relax should keep original relocations. +## TODO Due to R_LARCH_RELAX is not relaxed, it plays same as --relax now. +# RUN: ld.lld -Ttext=0x10000 --emit-relocs --no-relax %t.64.o -o %t.64.norelax +# RUN: llvm-objdump -dr %t.64.norelax | FileCheck %s + +# CHECK: 00010000 <_start>: +# CHECK-NEXT: pcalau12i $a0, 0 +# CHECK-NEXT: R_LARCH_PCALA_HI20 _start +# CHECK-NEXT: R_LARCH_RELAX *ABS* +# CHECK-NEXT: addi.{{[dw]}} $a0, $a0, 0 +# CHECK-NEXT: R_LARCH_PCALA_LO12 _start +# CHECK-NEXT: R_LARCH_RELAX *ABS* +# CHECK-NEXT: nop +# CHECK-NEXT: R_LARCH_ALIGN .Lla-relax-align0+0x4 +# CHECK-NEXT: nop +# CHECK-NEXT: ret + +# CHECKR: <_start>: +# CHECKR-NEXT: pcalau12i $a0, 0 +# CHECKR-NEXT: R_LARCH_PCALA_HI20 _start +# CHECKR-NEXT: R_LARCH_RELAX *ABS* +# CHECKR-NEXT: addi.d $a0, $a0, 0 +# CHECKR-NEXT: R_LARCH_PCALA_LO12 _start +# CHECKR-NEXT: R_LARCH_RELAX *ABS* +# CHECKR-NEXT: nop +# CHECKR-NEXT: R_LARCH_ALIGN .Lla-relax-align0+0x4 +# CHECKR-NEXT: nop +# CHECKR-NEXT: nop +# CHECKR-NEXT: ret + +.global _start +_start: + la.pcrel $a0, _start + .p2align 4 + ret diff --git a/lld/test/ELF/lto/resolution-err.ll b/lld/test/ELF/lto/resolution-err.ll index 6dfa64b1b8b9ee..f9855abaff3279 100644 --- a/lld/test/ELF/lto/resolution-err.ll +++ b/lld/test/ELF/lto/resolution-err.ll @@ -1,5 +1,5 @@ ; UNSUPPORTED: system-windows -; REQUIRES: shell +; REQUIRES: shell, non-root-user ; RUN: llvm-as %s -o %t.bc ; RUN: touch %t.resolution.txt ; RUN: chmod u-w %t.resolution.txt diff --git a/lld/test/ELF/lto/systemz.ll b/lld/test/ELF/lto/systemz.ll new file mode 100644 index 00000000000000..42bf4e32fb6d75 --- /dev/null +++ b/lld/test/ELF/lto/systemz.ll @@ -0,0 +1,18 @@ +; REQUIRES: systemz +;; Test we can infer the e_machine value EM_S390 from a bitcode file. + +; RUN: llvm-as %s -o %t.o +; RUN: ld.lld %t.o -o %t +; RUN: llvm-readobj -h %t | FileCheck %s + +; CHECK: Class: 64-bit +; CHECK: DataEncoding: BigEndian +; CHECK: Machine: EM_S390 + +target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-v128:64-a:8:16-n32:64" +target triple = "s390x-unknown-linux-gnu" + +define void @_start() { +entry: + ret void +} diff --git a/lld/test/ELF/lto/thinlto-cant-write-index.ll b/lld/test/ELF/lto/thinlto-cant-write-index.ll index e664acbb17de1a..286fcddd4238a1 100644 --- a/lld/test/ELF/lto/thinlto-cant-write-index.ll +++ b/lld/test/ELF/lto/thinlto-cant-write-index.ll @@ -1,4 +1,4 @@ -; REQUIRES: x86 +; REQUIRES: x86, non-root-user ; Basic ThinLTO tests. ; RUN: opt -module-summary %s -o %t1.o diff --git a/lld/test/ELF/lto/thinlto-emit-imports.ll b/lld/test/ELF/lto/thinlto-emit-imports.ll index 6d0e1e65047db4..253ec08619c982 100644 --- a/lld/test/ELF/lto/thinlto-emit-imports.ll +++ b/lld/test/ELF/lto/thinlto-emit-imports.ll @@ -1,4 +1,4 @@ -; REQUIRES: x86 +; REQUIRES: x86, non-root-user ;; Test a few properties not tested by thinlto-index-only.ll ; RUN: opt -module-summary %s -o %t1.o diff --git a/lld/test/ELF/systemz-got.s b/lld/test/ELF/systemz-got.s new file mode 100644 index 00000000000000..1d558aa3b02905 --- /dev/null +++ b/lld/test/ELF/systemz-got.s @@ -0,0 +1,16 @@ +# REQUIRES: systemz +# RUN: llvm-mc -filetype=obj -triple=s390x-unknown-linux %s -o %t.o +# RUN: llvm-mc -filetype=obj -triple=s390x-unknown-linux %p/Inputs/shared.s -o %t2.o +# RUN: ld.lld -shared %t2.o -soname=%t2.so -o %t2.so + +# RUN: ld.lld -dynamic-linker /lib/ld64.so.1 %t.o %t2.so -o %t +# RUN: llvm-readelf -S -r %t | FileCheck %s + +# CHECK: .got PROGBITS {{.*}} {{.*}} 000020 00 WA 0 0 8 + +# CHECK: Relocation section '.rela.dyn' at offset {{.*}} contains 1 entries: +# CHECK: {{.*}} 000000010000000a R_390_GLOB_DAT 0000000000000000 bar + 0 + +.global _start +_start: + lgrl %r1,bar@GOT diff --git a/lld/test/ELF/systemz-gotent-relax-align.s b/lld/test/ELF/systemz-gotent-relax-align.s new file mode 100644 index 00000000000000..c6326086f56db0 --- /dev/null +++ b/lld/test/ELF/systemz-gotent-relax-align.s @@ -0,0 +1,48 @@ +# REQUIRES: systemz +## Verify that R_390_GOTENT optimization is not performed on misaligned symbols. + +# RUN: llvm-mc -filetype=obj -relax-relocations -triple=s390x-unknown-linux %s -o %t.o +# RUN: ld.lld %t.o -o %t1 +# RUN: llvm-readelf -S -r -x .got -x .got.plt %t1 | FileCheck --check-prefixes=CHECK %s +# RUN: llvm-objdump --no-print-imm-hex -d %t1 | FileCheck --check-prefix=DISASM %s + +## We retain one .got entry for the unaligned symbol. +# CHECK: Name Type Address Off Size ES Flg Lk Inf Al +# CHECK: .got PROGBITS 00000000010021e0 0001e0 000020 00 WA 0 0 8 +# CHECK-NEXT: .relro_padding NOBITS 0000000001002200 000200 000e00 00 WA 0 0 1 +# CHECK-NEXT: .data PROGBITS 0000000001003200 000200 000006 00 WA 0 0 2 + +# CHECK-LABEL: Hex dump of section '.got': +# CHECK-NEXT: 0x010021e0 00000000 00000000 00000000 00000000 +# CHECK-NEXT: 0x010021f0 00000000 00000000 00000000 01003205 + +# DISASM: Disassembly of section .text: +# DISASM: <_start>: +# DISASM-NEXT: larl %r1, 0x1003200 +# DISASM-NEXT: larl %r1, 0x1003200 +# DISASM-NEXT: lgrl %r1, 0x10021f8 +# DISASM-NEXT: lgrl %r1, 0x10021f8 + +.data +.globl var_align +.hidden var_align + .align 2 +var_align: + .long 0 + +.data +.globl var_unalign +.hidden var_unalign + .align 2 + .byte 0 +var_unalign: + .byte 0 + +.text +.globl _start +.type _start, @function +_start: + lgrl %r1, var_align@GOT + lgrl %r1, var_align@GOT + lgrl %r1, var_unalign@GOT + lgrl %r1, var_unalign@GOT diff --git a/lld/test/ELF/systemz-gotent-relax-und-dso.s b/lld/test/ELF/systemz-gotent-relax-und-dso.s new file mode 100644 index 00000000000000..57369a417fd445 --- /dev/null +++ b/lld/test/ELF/systemz-gotent-relax-und-dso.s @@ -0,0 +1,68 @@ +# REQUIRES: systemz +# RUN: llvm-mc -filetype=obj -relax-relocations -triple=s390x-unknown-linux %s -o %t.o +# RUN: llvm-mc -filetype=obj -relax-relocations -triple=s390x-unknown-linux %S/Inputs/gotpc-relax-und-dso.s -o %tdso.o +# RUN: ld.lld -shared %tdso.o -soname=t.so -o %t.so +# RUN: ld.lld --hash-style=sysv -shared %t.o %t.so -o %t +# RUN: llvm-readelf -r %t | FileCheck --check-prefix=RELOC %s +# RUN: llvm-objdump --no-print-imm-hex -d %t | FileCheck --check-prefix=DISASM %s + +# RELOC-LABEL: Relocation section '.rela.dyn' at offset {{.*}} contains 3 entries: +# RELOC: 00000000000023f8 000000010000000a R_390_GLOB_DAT 00000000000012d8 foo + 0 +# RELOC: 0000000000002400 000000030000000a R_390_GLOB_DAT 0000000000000000 und + 0 +# RELOC: 0000000000002408 000000040000000a R_390_GLOB_DAT 0000000000000000 dsofoo + 0 + +# DISASM: Disassembly of section .text: +# DISASM-EMPTY: +# DISASM-NEXT: : +# DISASM-NEXT: bc 0, 0 +# DISASM: : +# DISASM-NEXT: bc 0, 0 +# DISASM: <_start>: +# DISASM-NEXT: lgrl %r1, 0x2400 +# DISASM-NEXT: lgrl %r1, 0x2400 +# DISASM-NEXT: lgrl %r1, 0x2408 +# DISASM-NEXT: lgrl %r1, 0x2408 +# DISASM-NEXT: larl %r1, 0x12dc +# DISASM-NEXT: larl %r1, 0x12dc +# DISASM-NEXT: lgrl %r1, 0x23f8 +# DISASM-NEXT: lgrl %r1, 0x23f8 +# DISASM-NEXT: lgrl %r1, 0x2400 +# DISASM-NEXT: lgrl %r1, 0x2400 +# DISASM-NEXT: lgrl %r1, 0x2408 +# DISASM-NEXT: lgrl %r1, 0x2408 +# DISASM-NEXT: larl %r1, 0x12dc +# DISASM-NEXT: larl %r1, 0x12dc +# DISASM-NEXT: lgrl %r1, 0x23f8 +# DISASM-NEXT: lgrl %r1, 0x23f8 + +.text +.globl foo +.type foo, @function +foo: + nop + +.globl hid +.hidden hid +.type hid, @function +hid: + nop + +.globl _start +.type _start, @function +_start: + lgrl %r1, und@GOT + lgrl %r1, und@GOT + lgrl %r1, dsofoo@GOT + lgrl %r1, dsofoo@GOT + lgrl %r1, hid@GOT + lgrl %r1, hid@GOT + lgrl %r1, foo@GOT + lgrl %r1, foo@GOT + lgrl %r1, und@GOT + lgrl %r1, und@GOT + lgrl %r1, dsofoo@GOT + lgrl %r1, dsofoo@GOT + lgrl %r1, hid@GOT + lgrl %r1, hid@GOT + lgrl %r1, foo@GOT + lgrl %r1, foo@GOT diff --git a/lld/test/ELF/systemz-gotent-relax.s b/lld/test/ELF/systemz-gotent-relax.s new file mode 100644 index 00000000000000..f665e1af9e53d2 --- /dev/null +++ b/lld/test/ELF/systemz-gotent-relax.s @@ -0,0 +1,91 @@ +# REQUIRES: systemz +## Test R_390_GOTENT optimization. + +# RUN: llvm-mc -filetype=obj -relax-relocations -triple=s390x-unknown-linux %s -o %t.o +# RUN: ld.lld %t.o -o %t1 --no-apply-dynamic-relocs +# RUN: llvm-readelf -S -r -x .got.plt %t1 | FileCheck --check-prefixes=CHECK,NOAPPLY %s +# RUN: ld.lld %t.o -o %t1 --apply-dynamic-relocs +# RUN: llvm-readelf -S -r -x .got.plt %t1 | FileCheck --check-prefixes=CHECK,APPLY %s +# RUN: ld.lld %t.o -o %t1 +# RUN: llvm-objdump --no-print-imm-hex -d %t1 | FileCheck --check-prefix=DISASM %s + +## --no-relax disables GOT optimization. +# RUN: ld.lld --no-relax %t.o -o %t2 +# RUN: llvm-objdump --no-print-imm-hex -d %t2 | FileCheck --check-prefix=NORELAX %s + +## In our implementation, .got is retained even if all GOT-generating relocations are optimized. +# CHECK: Name Type Address Off Size ES Flg Lk Inf Al +# CHECK: .iplt PROGBITS 0000000001001240 000240 000020 00 AX 0 0 16 +# CHECK-NEXT: .got PROGBITS 0000000001002260 000260 000018 00 WA 0 0 8 +# CHECK-NEXT: .relro_padding NOBITS 0000000001002278 000278 000d88 00 WA 0 0 1 +# CHECK-NEXT: .got.plt PROGBITS 0000000001003278 000278 000008 00 WA 0 0 8 + +## There is one R_S390_IRELATIVE relocation. +# CHECK-LABEL: Relocation section '.rela.dyn' at offset {{.*}} contains 1 entries: +# CHECK: 0000000001003278 000000000000003d R_390_IRELATIVE 10011e8 + +# CHECK-LABEL: Hex dump of section '.got.plt': +# NOAPPLY-NEXT: 0x01003278 00000000 00000000 +# APPLY-NEXT: 0x01003278 00000000 010011e8 + +# DISASM: Disassembly of section .text: +# DISASM: 00000000010011e0 : +# DISASM-NEXT: bc 0, 0 +# DISASM: 00000000010011e4 : +# DISASM-NEXT: bc 0, 0 +# DISASM: 00000000010011e8 : +# DISASM-NEXT: br %r14 +# DISASM: 00000000010011ea <_start>: +# DISASM-NEXT: larl %r1, 0x10011e0 +# DISASM-NEXT: larl %r1, 0x10011e0 +# DISASM-NEXT: larl %r1, 0x10011e4 +# DISASM-NEXT: larl %r1, 0x10011e4 +# DISASM-NEXT: lgrl %r1, 0x1003278 +# DISASM-NEXT: lgrl %r1, 0x1003278 +# DISASM-NEXT: larl %r1, 0x10011e0 +# DISASM-NEXT: larl %r1, 0x10011e0 +# DISASM-NEXT: larl %r1, 0x10011e4 +# DISASM-NEXT: larl %r1, 0x10011e4 +# DISASM-NEXT: lgrl %r1, 0x1003278 +# DISASM-NEXT: lgrl %r1, 0x1003278 + +# NORELAX-LABEL: <_start>: +# NORELAX-COUNT-12: lgrl + +.text +.globl foo + +.text +.globl foo +.type foo, @function +foo: + nop + +.globl hid +.hidden hid +.type hid, @function +hid: + nop + +.text +.type ifunc STT_GNU_IFUNC +.globl ifunc +.type ifunc, @function +ifunc: + br %r14 + +.globl _start +.type _start, @function +_start: + lgrl %r1, foo@GOT + lgrl %r1, foo@GOT + lgrl %r1, hid@GOT + lgrl %r1, hid@GOT + lgrl %r1, ifunc@GOT + lgrl %r1, ifunc@GOT + lgrl %r1, foo@GOT + lgrl %r1, foo@GOT + lgrl %r1, hid@GOT + lgrl %r1, hid@GOT + lgrl %r1, ifunc@GOT + lgrl %r1, ifunc@GOT diff --git a/lld/test/ELF/systemz-ifunc-nonpreemptible.s b/lld/test/ELF/systemz-ifunc-nonpreemptible.s new file mode 100644 index 00000000000000..5056db302ca1c7 --- /dev/null +++ b/lld/test/ELF/systemz-ifunc-nonpreemptible.s @@ -0,0 +1,75 @@ +# REQUIRES: systemz +# RUN: llvm-mc -filetype=obj -triple=s390x-none-linux-gnu %s -o %t.o +# RUN: ld.lld -static %t.o -o %t +# RUN: ld.lld -static %t.o -o %t.apply --apply-dynamic-relocs +# RUN: llvm-readelf --section-headers --relocations --symbols %t | FileCheck %s +# RUN: llvm-readelf -x .got.plt %t | FileCheck %s --check-prefix=NO-APPLY-RELOC +# RUN: llvm-readelf -x .got.plt %t.apply | FileCheck %s --check-prefix=APPLY-RELOC +# RUN: llvm-objdump --no-print-imm-hex -d --no-show-raw-insn %t | FileCheck %s --check-prefix=DISASM + +# CHECK: Section Headers: +# CHECK-NEXT: [Nr] Name Type Address Off Size ES Flg Lk Inf Al +# CHECK-NEXT: [ 0] NULL 0000000000000000 000000 000000 00 0 0 0 +# CHECK-NEXT: [ 1] .rela.dyn RELA 0000000001000158 000158 000030 18 AI 0 4 8 +# CHECK-NEXT: [ 2] .text PROGBITS 0000000001001188 000188 00001c 00 AX 0 0 4 +# CHECK-NEXT: [ 3] .iplt PROGBITS 00000000010011b0 0001b0 000040 00 AX 0 0 16 +# CHECK-NEXT: [ 4] .got.plt PROGBITS 00000000010021f0 0001f0 000010 00 WA 0 0 8 + +# CHECK: Relocation section '.rela.dyn' at offset 0x158 contains 2 entries: +# CHECK-NEXT: Offset Info Type Symbol's Value Symbol's Name + Addend +# CHECK-NEXT: 00000000010021f0 000000000000003d R_390_IRELATIVE 1001188 +# CHECK-NEXT: 00000000010021f8 000000000000003d R_390_IRELATIVE 100118a + +# CHECK: Symbol table '.symtab' contains 6 entries: +# CHECK-NEXT: Num: Value Size Type Bind Vis Ndx Name +# CHECK-NEXT: 0: 0000000000000000 0 NOTYPE LOCAL DEFAULT UND +# CHECK-NEXT: 1: 0000000001000158 0 NOTYPE LOCAL HIDDEN 1 __rela_iplt_start +# CHECK-NEXT: 2: 0000000001000188 0 NOTYPE LOCAL HIDDEN 1 __rela_iplt_end +# CHECK-NEXT: 3: 0000000001001188 0 IFUNC GLOBAL DEFAULT 2 foo +# CHECK-NEXT: 4: 000000000100118a 0 IFUNC GLOBAL DEFAULT 2 bar +# CHECK-NEXT: 5: 000000000100118c 0 NOTYPE GLOBAL DEFAULT 2 _start + +# NO-APPLY-RELOC-LABEL: Hex dump of section '.got.plt': +# NO-APPLY-RELOC-NEXT: 0x010021f0 00000000 00000000 00000000 00000000 +# NO-APPLY-RELOC-EMPTY: + +# APPLY-RELOC-LABEL: Hex dump of section '.got.plt': +# APPLY-RELOC-NEXT: 0x010021f0 00000000 01001188 00000000 0100118a +# APPLY-RELOC-EMPTY: + +# DISASM: Disassembly of section .text: +# DISASM: 0000000001001188 : +# DISASM-NEXT: br %r14 +# DISASM: 000000000100118a : +# DISASM-NEXT: br %r14 +# DISASM: 000000000100118c <_start>: +# DISASM-NEXT: brasl %r14, 0x10011b0 +# DISASM-NEXT: brasl %r14, 0x10011d0 +# DISASM-NEXT: larl %r2, 0x1000158 +# DISASM-NEXT: larl %r2, 0x1000188 +# DISASM: Disassembly of section .iplt: +# DISASM: <.iplt>: +# DISASM: 10011b0: larl %r1, 0x10021f0 +# DISASM-NEXT: 10011b6: lg %r1, 0(%r1) +# DISASM-NEXT: 10011bc: br %r1 +# DISASM: 10011d0: larl %r1, 0x10021f8 +# DISASM-NEXT: 10011d6: lg %r1, 0(%r1) +# DISASM-NEXT: 10011dc: br %r1 + +.text +.type foo STT_GNU_IFUNC +.globl foo +foo: + br %r14 + +.type bar STT_GNU_IFUNC +.globl bar +bar: + br %r14 + +.globl _start +_start: + brasl %r14, foo@plt + brasl %r14, bar@plt + larl %r2, __rela_iplt_start + larl %r2, __rela_iplt_end diff --git a/lld/test/ELF/systemz-init-padding.s b/lld/test/ELF/systemz-init-padding.s new file mode 100644 index 00000000000000..c56b98d43f1b0e --- /dev/null +++ b/lld/test/ELF/systemz-init-padding.s @@ -0,0 +1,27 @@ +# REQUIRES: systemz +# RUN: llvm-mc -filetype=obj -triple=s390x-unknown-linux %p/Inputs/systemz-init.s -o systemz-init.o +# RUN: llvm-mc -filetype=obj -triple=s390x-unknown-linux %s -o %t.o +# RUN: ld.lld -dynamic-linker /lib/ld64.so.1 %t.o systemz-init.o -o %t +# RUN: llvm-objdump -d --no-show-raw-insn -j .init %t | FileCheck %s + +# glibc < 2.39 used to align .init and .fini code at a 4-byte boundary. +# When that happens, the linker must not pad the code with invalid +# instructions, e.g. null bytes. + .section .init,"ax",@progbits + brasl %r14, startup + +# CHECK: <.init>: +# CHECK-NEXT: brasl %r14, +# CHECK-NEXT: bcr 0, %r7 +# CHECK-NEXT: lg %r4, 272(%r15) + + .text + .globl startup + .p2align 4 +startup: + br %r14 + + .globl main + .p2align 4 +main: + br %r14 diff --git a/lld/test/ELF/systemz-pie.s b/lld/test/ELF/systemz-pie.s new file mode 100644 index 00000000000000..bb971a82fb8ced --- /dev/null +++ b/lld/test/ELF/systemz-pie.s @@ -0,0 +1,38 @@ +# REQUIRES: systemz +# RUN: llvm-mc -filetype=obj -triple=s390x-unknown-linux %s -o %t1.o + +## Check -pie. +# RUN: ld.lld -pie %t1.o -o %t +# RUN: llvm-readelf --file-headers --program-headers --dynamic %t | FileCheck %s + +# CHECK: ELF Header: +# CHECK-NEXT: Magic: 7f 45 4c 46 02 02 01 00 00 00 00 00 00 00 00 00 +# CHECK-NEXT: Class: ELF64 +# CHECK-NEXT: Data: 2's complement, big endian +# CHECK-NEXT: Version: 1 (current) +# CHECK-NEXT: OS/ABI: UNIX - System V +# CHECK-NEXT: ABI Version: 0 +# CHECK-NEXT: Type: DYN (Shared object file) +# CHECK-NEXT: Machine: IBM S/390 +# CHECK-NEXT: Version: 0x1 + +# CHECK: Program Headers: +# CHECK-NEXT: Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align +# CHECK-NEXT: PHDR 0x000040 0x0000000000000040 0x0000000000000040 0x000188 0x000188 R 0x8 +# CHECK-NEXT: LOAD 0x000000 0x0000000000000000 0x0000000000000000 0x00020d 0x00020d R 0x1000 +# CHECK-NEXT: LOAD 0x000210 0x0000000000002210 0x0000000000002210 0x000090 0x000df0 RW 0x1000 +# CHECK-NEXT: DYNAMIC 0x000210 0x0000000000002210 0x0000000000002210 0x000090 0x000090 RW 0x8 +# CHECK-NEXT: GNU_RELRO 0x000210 0x0000000000002210 0x0000000000002210 0x000090 0x000df0 R 0x1 +# CHECK-NEXT: GNU_STACK 0x000000 0x0000000000000000 0x0000000000000000 0x000000 0x000000 RW 0x0 + +# CHECK: Dynamic section at offset 0x210 contains 9 entries: +# CHECK-NEXT: Tag Type Name/Value +# CHECK-NEXT: 0x000000006ffffffb (FLAGS_1) PIE + +## Check -nopie +# RUN: ld.lld -no-pie %t1.o -o %t2 +# RUN: llvm-readelf --file-headers %t2 | FileCheck %s --check-prefix=NOPIE +# NOPIE-NOT: Type: DYN + +.globl _start +_start: diff --git a/lld/test/ELF/systemz-plt.s b/lld/test/ELF/systemz-plt.s new file mode 100644 index 00000000000000..4669f01f588121 --- /dev/null +++ b/lld/test/ELF/systemz-plt.s @@ -0,0 +1,83 @@ +# REQUIRES: systemz +# RUN: echo '.globl bar, weak; .type bar,@function; .type weak,@function; bar: weak:' > %t1.s + +# RUN: llvm-mc -filetype=obj -triple=s390x-unknown-linux %t1.s -o %t1.o +# RUN: ld.lld -shared %t1.o -soname=t1.so -o %t1.so +# RUN: llvm-mc -filetype=obj -triple=s390x-unknown-linux %s -o %t.o +# RUN: ld.lld %t.o %t1.so -z separate-code -o %t +# RUN: llvm-readelf -S -s -r -x .got.plt %t | FileCheck %s +# RUN: llvm-objdump -d %t | FileCheck --check-prefixes=DIS %s + +# CHECK: Section Headers: +# CHECK: .plt PROGBITS 0000000001001020 001020 000060 00 AX 0 0 16 +# CHECK: .got PROGBITS 00000000010020d0 0020d0 000018 00 WA 0 0 8 +# CHECK: .got.plt PROGBITS 00000000010030e8 0020e8 000010 00 WA 0 0 8 + +# CHECK: Relocation section '.rela.plt' at offset {{.*}} contains 2 entries: +# CHECK: 00000000010030e8 000000010000000b R_390_JMP_SLOT 0000000000000000 bar + 0 +# CHECK: 00000000010030f0 000000020000000b R_390_JMP_SLOT 0000000000000000 weak + 0 + +## A canonical PLT has a non-zero st_value. bar and weak are called but their +## addresses are not taken, so a canonical PLT is not necessary. +# CHECK: Symbol table '.dynsym' contains 3 entries: +# CHECK-NEXT: Num: Value Size Type Bind Vis Ndx Name +# CHECK-NEXT: 0: 0000000000000000 0 NOTYPE LOCAL DEFAULT UND +# CHECK-NEXT: 1: 0000000000000000 0 FUNC GLOBAL DEFAULT UND bar +# CHECK-NEXT: 2: 0000000000000000 0 FUNC WEAK DEFAULT UND weak + +## The .got.plt slots relocated by .rela.plt point to .plt +## This is required by glibc. +# CHECK: Hex dump of section '.got.plt': +# CHECK-NEXT: 0x010030e8 00000000 0100104e 00000000 0100106e + +# DIS: Disassembly of section .text: + +# DIS: 0000000001001000 <_start>: +# DIS-NEXT: brasl %r14, 0x1001012 +# DIS-NEXT: brasl %r14, 0x1001040 +# DIS-NEXT: brasl %r14, 0x1001060 + +# DIS: 0000000001001012 : +# DIS-NEXT: br %r14 + +# DIS: Disassembly of section .plt: + +# DIS: 0000000001001020 <.plt>: +# DIS-NEXT: 1001020: e3 10 f0 38 00 24 stg %r1, 56(%r15) +# DIS-NEXT: 1001026: c0 10 00 00 08 55 larl %r1, 0x10020d0 +# DIS-NEXT: 100102c: d2 07 f0 30 10 08 mvc 48(8,%r15), 8(%r1) +# DIS-NEXT: 1001032: e3 10 10 10 00 04 lg %r1, 16(%r1) +# DIS-NEXT: 1001038: 07 f1 br %r1 +# DIS-NEXT: 100103a: 07 00 bcr 0, %r0 +# DIS-NEXT: 100103c: 07 00 bcr 0, %r0 +# DIS-NEXT: 100103e: 07 00 bcr 0, %r0 +# DIS-NEXT: 1001040: c0 10 00 00 10 54 larl %r1, 0x10030e8 +# DIS-NEXT: 1001046: e3 10 10 00 00 04 lg %r1, 0(%r1) +# DIS-NEXT: 100104c: 07 f1 br %r1 +# DIS-NEXT: 100104e: 0d 10 basr %r1, 0 +# DIS-NEXT: 1001050: e3 10 10 0c 00 14 lgf %r1, 12(%r1) +# DIS-NEXT: 1001056: c0 f4 ff ff ff e5 jg 0x1001020 +# DIS-NEXT: 100105c: 00 00 +# DIS-NEXT: 100105e: 00 00 +# DIS-NEXT: 1001060: c0 10 00 00 10 48 larl %r1, 0x10030f0 +# DIS-NEXT: 1001066: e3 10 10 00 00 04 lg %r1, 0(%r1) +# DIS-NEXT: 100106c: 07 f1 br %r1 +# DIS-NEXT: 100106e: 0d 10 basr %r1, 0 +# DIS-NEXT: 1001070: e3 10 10 0c 00 14 lgf %r1, 12(%r1) +# DIS-NEXT: 1001076: c0 f4 ff ff ff d5 jg 0x1001020 +# DIS-NEXT: 100107c: 00 00 +# DIS-NEXT: 100107e: 00 18 + +.global _start, foo, bar +.weak weak + +_start: + ## Use @plt to avoid generating direct references that would force + ## allocation of a canonical PLT entry. + brasl %r14, foo@plt + brasl %r14, bar@plt + brasl %r14, weak@plt + +## foo is local and non-preemptable, no PLT is generated. +foo: + br %r14 diff --git a/lld/test/ELF/systemz-reloc-abs.s b/lld/test/ELF/systemz-reloc-abs.s new file mode 100644 index 00000000000000..b5ad94d90d3a96 --- /dev/null +++ b/lld/test/ELF/systemz-reloc-abs.s @@ -0,0 +1,32 @@ +# REQUIRES: systemz +# RUN: llvm-mc -filetype=obj -triple=s390x %s -o %t.o +# RUN: llvm-mc -filetype=obj -triple=s390x %S/Inputs/abs255.s -o %t255.o +# RUN: llvm-mc -filetype=obj -triple=s390x %S/Inputs/abs256.s -o %t256.o +# RUN: llvm-mc -filetype=obj -triple=s390x %S/Inputs/abs257.s -o %t257.o + +# RUN: ld.lld %t.o %t256.o -o %t +# RUN: llvm-readelf -x .data %t | FileCheck %s +# CHECK: 0x{{[0-9a-f]+}} ff80ffff 8000ffff ffff8000 0000ffff +# CHECK-NEXT: ffffffff ffff8000 00000000 0000 + +# RUN: not ld.lld %t.o %t255.o -o /dev/null 2>&1 | FileCheck --check-prefix=OVERFLOW1 %s +# OVERFLOW1: relocation R_390_8 out of range: -129 is not in [-128, 255] +# OVERFLOW1: relocation R_390_16 out of range: -32769 is not in [-32768, 65535] +# OVERFLOW1: relocation R_390_32 out of range: -2147483649 is not in [-2147483648, 4294967295] + +# RUN: not ld.lld %t.o %t257.o -o /dev/null 2>&1 | FileCheck --check-prefix=OVERFLOW2 %s +# OVERFLOW2: relocation R_390_8 out of range: 256 is not in [-128, 255] +# OVERFLOW2: relocation R_390_16 out of range: 65536 is not in [-32768, 65535] +# OVERFLOW2: relocation R_390_32 out of range: 4294967296 is not in [-2147483648, 4294967295] + +.globl _start +_start: +.data +.byte foo - 1 +.byte foo - 384 +.word foo + 0xfeff +.word foo - 0x8100 +.long foo + 0xfffffeff +.long foo - 0x80000100 +.quad foo + 0xfffffffffffffeff +.quad foo - 0x8000000000000100 diff --git a/lld/test/ELF/systemz-reloc-disp12.s b/lld/test/ELF/systemz-reloc-disp12.s new file mode 100644 index 00000000000000..3d32707d149fe7 --- /dev/null +++ b/lld/test/ELF/systemz-reloc-disp12.s @@ -0,0 +1,21 @@ +# REQUIRES: systemz +# RUN: llvm-mc -filetype=obj -triple=s390x -defsym DISP=291 %s -o %t1.o +# RUN: llvm-mc -filetype=obj -triple=s390x -defsym DISP=4095 %s -o %t2.o +# RUN: llvm-mc -filetype=obj -triple=s390x -defsym DISP=4096 %s -o %t3.o + +# RUN: ld.lld --section-start=.text=0x0 %t1.o -o %t1out +# RUN: ld.lld --section-start=.text=0x0 %t2.o -o %t2out +# RUN: not ld.lld --section-start=.text=0x0 %t3.o -o /dev/null 2>&1 | FileCheck %s --check-prefix RANGE + +# RANGE: relocation R_390_12 out of range: 4096 is not in [0, 4095] + +# RUN: llvm-readelf --hex-dump=.text %t1out | FileCheck %s -DINSN=58678123 --check-prefix DUMP +# RUN: llvm-readelf --hex-dump=.text %t2out | FileCheck %s -DINSN=58678fff --check-prefix DUMP + +# DUMP: 0x00000000 [[INSN]] + +.text +.globl _start +_start: + .reloc .+2, R_390_12, DISP + l %r6, 0(%r7,%r8) diff --git a/lld/test/ELF/systemz-reloc-disp20.s b/lld/test/ELF/systemz-reloc-disp20.s new file mode 100644 index 00000000000000..88cd657c6ae3cb --- /dev/null +++ b/lld/test/ELF/systemz-reloc-disp20.s @@ -0,0 +1,21 @@ +# REQUIRES: systemz +# RUN: llvm-mc -filetype=obj -triple=s390x -defsym DISP=74565 %s -o %t1.o +# RUN: llvm-mc -filetype=obj -triple=s390x -defsym DISP=524287 %s -o %t2.o +# RUN: llvm-mc -filetype=obj -triple=s390x -defsym DISP=524288 %s -o %t3.o + +# RUN: ld.lld --section-start=.text=0x0 %t1.o -o %t1out +# RUN: ld.lld --section-start=.text=0x0 %t2.o -o %t2out +# RUN: not ld.lld --section-start=.text=0x0 %t3.o -o /dev/null 2>&1 | FileCheck %s --check-prefix RANGE + +# RANGE: relocation R_390_20 out of range: 524288 is not in [-524288, 524287] + +# RUN: llvm-readelf --hex-dump=.text %t1out | FileCheck %s -DINSN="e3678345 1204" --check-prefix DUMP +# RUN: llvm-readelf --hex-dump=.text %t2out | FileCheck %s -DINSN="e3678fff 7f04" --check-prefix DUMP + +# DUMP: 0x00000000 [[INSN]] + +.text +.globl _start +_start: + .reloc .+2, R_390_20, DISP + lg %r6, 0(%r7,%r8) diff --git a/lld/test/ELF/systemz-reloc-got.s b/lld/test/ELF/systemz-reloc-got.s new file mode 100644 index 00000000000000..4b9ac16481f4cb --- /dev/null +++ b/lld/test/ELF/systemz-reloc-got.s @@ -0,0 +1,92 @@ +# REQUIRES: systemz +# RUN: llvm-mc -filetype=obj -triple=s390x-unknown-linux %s -o %t.o +# RUN: ld.lld -z norelro -shared %t.o -soname=t.so -o %t.so +## Note: Without norelro the distance between .got and .got.plt causes +## R_390_GOTPLT12 relocations to always overflow. + +# RUN: llvm-readelf -S -x .data %t.so | FileCheck %s +# RUN: llvm-objdump -d --no-show-raw-insn %t.so | FileCheck %s --check-prefix=DISASM + +# CHECK: Section Headers: +# CHECK: .got PROGBITS 0000000000002458 +# CHECK: .got.plt PROGBITS 0000000000002480 + +## Note: _GLOBAL_OFFSET_TABLE is at .got +## GOT (foo) is at .got + 24 == 0x2470 +## GOT (bar) is at .got + 32 == 0x2478 +## GOTPLT (foo) is at .got.plt + 0 == .got + 40 == 0x2480 +## GOTPLT (bar) is at .got.plt + 8 == .got + 48 == 0x2488 + +# DISASM: larl %r12, 0x2458 +# DISASM-NEXT: larl %r1, 0x2470 +# DISASM-NEXT: larl %r1, 0x2478 +# DISASM-NEXT: larl %r1, 0x2480 +# DISASM-NEXT: larl %r1, 0x2488 + +# DISASM-NEXT: l %r1, 24(%r12) +# DISASM-NEXT: l %r1, 32(%r12) +# DISASM-NEXT: l %r1, 40(%r12) +# DISASM-NEXT: l %r1, 48(%r12) +# DISASM-NEXT: lg %r1, 24(%r12) +# DISASM-NEXT: lg %r1, 32(%r12) +# DISASM-NEXT: lg %r1, 40(%r12) +# DISASM-NEXT: lg %r1, 48(%r12) + +# CHECK: Hex dump of section '.data': +# CHECK-NEXT: 00180020 00280030 00000018 00000020 +# CHECK-NEXT: 00000028 00000030 00000000 00000018 +# CHECK-NEXT: 00000000 00000020 00000000 00000028 +# CHECK-NEXT: 00000000 00000030 + +.text +larl %r12, _GLOBAL_OFFSET_TABLE_ +.reloc .+2, R_390_GOTENT, foo+2 +larl %r1, 0 +.reloc .+2, R_390_GOTENT, bar+2 +larl %r1, 0 +.reloc .+2, R_390_GOTPLTENT, foo+2 +larl %r1, 0 +.reloc .+2, R_390_GOTPLTENT, bar+2 +larl %r1, 0 +.reloc .+2, R_390_GOT12, foo +l %r1, 0(%r12) +.reloc .+2, R_390_GOT12, bar +l %r1, 0(%r12) +.reloc .+2, R_390_GOTPLT12, foo +l %r1, 0(%r12) +.reloc .+2, R_390_GOTPLT12, bar +l %r1, 0(%r12) +.reloc .+2, R_390_GOT20, foo +lg %r1, 0(%r12) +.reloc .+2, R_390_GOT20, bar +lg %r1, 0(%r12) +.reloc .+2, R_390_GOTPLT20, foo +lg %r1, 0(%r12) +.reloc .+2, R_390_GOTPLT20, bar +lg %r1, 0(%r12) + +.data +.reloc ., R_390_GOT16, foo +.space 2 +.reloc ., R_390_GOT16, bar +.space 2 +.reloc ., R_390_GOTPLT16, foo +.space 2 +.reloc ., R_390_GOTPLT16, bar +.space 2 +.reloc ., R_390_GOT32, foo +.space 4 +.reloc ., R_390_GOT32, bar +.space 4 +.reloc ., R_390_GOTPLT32, foo +.space 4 +.reloc ., R_390_GOTPLT32, bar +.space 4 +.reloc ., R_390_GOT64, foo +.space 8 +.reloc ., R_390_GOT64, bar +.space 8 +.reloc ., R_390_GOTPLT64, foo +.space 8 +.reloc ., R_390_GOTPLT64, bar +.space 8 diff --git a/lld/test/ELF/systemz-reloc-gotrel.s b/lld/test/ELF/systemz-reloc-gotrel.s new file mode 100644 index 00000000000000..46669ecfa7fd01 --- /dev/null +++ b/lld/test/ELF/systemz-reloc-gotrel.s @@ -0,0 +1,36 @@ +# REQUIRES: systemz +# RUN: llvm-mc -filetype=obj -triple=s390x-unknown-linux %s -o %t.o +# RUN: ld.lld -shared %t.o -soname=t.so -o %t.so + +# RUN: llvm-readelf -S -s -x .data %t.so | FileCheck %s + +# CHECK: Section Headers: +# CHECK: .plt PROGBITS 0000000000001290 +# CHECK: .got PROGBITS 0000000000002390 + +# CHECK: Symbol table '.symtab' +# CHECK: 0000000000001288 {{.*}} bar + +## Note: foo is the first (and only) PLT entry, which resides at .plt + 32 +## PLTOFF (foo) is (.plt + 32) - .got == 0x12b0 - 0x2390 == 0xffffef20 +## GOTOFF (bar) is bar - .got == 0x1288 - 0x2390 == 0xffffeef8 +# CHECK: Hex dump of section '.data': +# CHECK-NEXT: eef8ef20 ffffeef8 ffffef20 ffffffff +# CHECK-NEXT: ffffeef8 ffffffff ffffef20 + +bar: + br %r14 + +.data +.reloc ., R_390_GOTOFF16, bar +.space 2 +.reloc ., R_390_PLTOFF16, foo +.space 2 +.reloc ., R_390_GOTOFF, bar +.space 4 +.reloc ., R_390_PLTOFF32, foo +.space 4 +.reloc ., R_390_GOTOFF64, bar +.space 8 +.reloc ., R_390_PLTOFF64, foo +.space 8 diff --git a/lld/test/ELF/systemz-reloc-pc16.s b/lld/test/ELF/systemz-reloc-pc16.s new file mode 100644 index 00000000000000..e1dad5af239d45 --- /dev/null +++ b/lld/test/ELF/systemz-reloc-pc16.s @@ -0,0 +1,39 @@ +# REQUIRES: systemz +# RUN: rm -rf %t && split-file %s %t + +## Check recompile with -fPIC error message +# RUN: llvm-mc -filetype=obj -triple=s390x-unknown-linux %t/shared.s -o %t/shared.o +# RUN: not ld.lld -shared %t/shared.o -o /dev/null 2>&1 | FileCheck %s + +# CHECK: error: relocation R_390_PC16 cannot be used against symbol '_shared'; recompile with -fPIC +# CHECK: >>> defined in {{.*}} +# CHECK: >>> referenced by {{.*}}:(.data+0x1) + +## Check patching of negative addends + +# RUN: llvm-mc -filetype=obj -triple=s390x -defsym ADDEND=1 %t/addend.s -o %t/1.o +# RUN: llvm-mc -filetype=obj -triple=s390x -defsym ADDEND=32768 %t/addend.s -o %t/2.o +# RUN: llvm-mc -filetype=obj -triple=s390x -defsym ADDEND=32769 %t/addend.s -o %t/3.o + +# RUN: ld.lld --section-start=.text=0x0 %t/1.o -o %t/1out +# RUN: ld.lld --section-start=.text=0x0 %t/2.o -o %t/2out +# RUN: not ld.lld --section-start=.text=0x0 %t/3.o -o /dev/null 2>&1 | FileCheck %s -DFILE=%t/3.o --check-prefix RANGE + +# RANGE: error: [[FILE]]:(.text+0x0): relocation R_390_PC16 out of range + +# RUN: llvm-readelf --hex-dump=.text %t/1out | FileCheck %s -DADDEND=ffff --check-prefix DUMP +# RUN: llvm-readelf --hex-dump=.text %t/2out | FileCheck %s -DADDEND=8000 --check-prefix DUMP + +# DUMP: 0x00000000 [[ADDEND]] + +#--- shared.s +.data + .byte 0xe8 + .word _shared - . + +#--- addend.s +.text +.globl _start +_start: + .reloc ., R_390_PC16, .text-ADDEND + .space 2 diff --git a/lld/test/ELF/systemz-reloc-pc32.s b/lld/test/ELF/systemz-reloc-pc32.s new file mode 100644 index 00000000000000..0cb9322eb1c1b9 --- /dev/null +++ b/lld/test/ELF/systemz-reloc-pc32.s @@ -0,0 +1,39 @@ +# REQUIRES: systemz +# RUN: rm -rf %t && split-file %s %t + +## Check recompile with -fPIC error message +# RUN: llvm-mc -filetype=obj -triple=s390x-unknown-linux %t/shared.s -o %t/shared.o +# RUN: not ld.lld -shared %t/shared.o -o /dev/null 2>&1 | FileCheck %s + +# CHECK: error: relocation R_390_PC32 cannot be used against symbol '_shared'; recompile with -fPIC +# CHECK: >>> defined in {{.*}} +# CHECK: >>> referenced by {{.*}}:(.data+0x1) + +## Check patching of negative addends + +# RUN: llvm-mc -filetype=obj -triple=s390x -defsym ADDEND=1 %t/addend.s -o %t/1.o +# RUN: llvm-mc -filetype=obj -triple=s390x -defsym ADDEND=2147483648 %t/addend.s -o %t/2.o +# RUN: llvm-mc -filetype=obj -triple=s390x -defsym ADDEND=2147483649 %t/addend.s -o %t/3.o + +# RUN: ld.lld --section-start=.text=0x0 %t/1.o -o %t/1out +# RUN: ld.lld --section-start=.text=0x0 %t/2.o -o %t/2out +# RUN: not ld.lld --section-start=.text=0x0 %t/3.o -o /dev/null 2>&1 | FileCheck %s -DFILE=%t/3.o --check-prefix RANGE + +# RANGE: error: [[FILE]]:(.text+0x0): relocation R_390_PC32 out of range + +# RUN: llvm-readelf --hex-dump=.text %t/1out | FileCheck %s -DADDEND=ffffffff --check-prefix DUMP +# RUN: llvm-readelf --hex-dump=.text %t/2out | FileCheck %s -DADDEND=80000000 --check-prefix DUMP + +# DUMP: 0x00000000 [[ADDEND]] + +#--- shared.s +.data + .byte 0xe8 + .long _shared - . + +#--- addend.s +.text +.globl _start +_start: + .reloc ., R_390_PC32, .text-ADDEND + .space 4 diff --git a/lld/test/ELF/systemz-reloc-pcdbl.s b/lld/test/ELF/systemz-reloc-pcdbl.s new file mode 100644 index 00000000000000..faee756f5e95b4 --- /dev/null +++ b/lld/test/ELF/systemz-reloc-pcdbl.s @@ -0,0 +1,68 @@ +# REQUIRES: systemz + +# RUN: llvm-mc --filetype=obj --triple=s390x-unknown-linux -mcpu=z13 %s -o %t.o + +# RUN: ld.lld %t.o --defsym foo16=pc16dbl+4 --defsym bar16=pc16dbl --defsym foo32=pc32dbl+6 --defsym bar32=pc32dbl --defsym foo12=pc12dbl+6 --defsym bar12=pc12dbl --defsym foo24=pc24dbl+6 --defsym bar24=pc24dbl -o %t +# RUN: llvm-objdump --no-show-raw-insn --mcpu=z13 -d %t | FileCheck %s --check-prefix=CHECK +# CHECK: 0000000001001120 : +# CHECK: je 0x1001124 +# CHECK: jne 0x1001120 +# CHECK: 0000000001001128 : +# CHECK: jge 0x100112e +# CHECK: jgne 0x1001128 +# CHECK: 0000000001001134 : +# CHECK: bprp 5, 0x100113a, 0x1001134 +# CHECK: bprp 6, 0x1001134, 0x100113a +# CHECK: 0000000001001140 : +# CHECK: bprp 5, 0x1001140, 0x1001146 +# CHECK: bprp 6, 0x1001146, 0x1001140 + +# RUN: ld.lld %t.o --defsym foo16=pc16dbl+0xfffe --defsym bar16=pc16dbl+4-0x10000 --defsym foo32=pc32dbl+0xfffffffe --defsym bar32=pc32dbl+6-0x100000000 --defsym foo12=pc12dbl+0xffe --defsym bar12=pc12dbl+6-0x1000 --defsym foo24=pc24dbl+0xfffffe --defsym bar24=pc24dbl+6-0x1000000 -o %t.limits +# RUN: llvm-objdump --no-show-raw-insn --mcpu=z13 -d %t.limits | FileCheck %s --check-prefix=LIMITS +# LIMITS: je 0x101111e +# LIMITS-NEXT: jne 0xff1124 +# LIMITS: jge 0x101001126 +# LIMITS-NEXT: jgne 0xffffffff0100112e +# LIMITS: bprp 5, 0x1002132, 0x1001134 +# LIMITS-NEXT: bprp 6, 0x100013a, 0x100113a +# LIMITS: bprp 5, 0x1001140, 0x200113e +# LIMITS-NEXT: bprp 6, 0x1001146, 0x1146 + +# RUN: not ld.lld %t.o --defsym foo16=pc16dbl+0x10000 --defsym bar16=pc16dbl+4-0x10002 --defsym foo32=pc32dbl+0x100000000 --defsym bar32=pc32dbl+6-0x100000002 --defsym foo12=pc12dbl+0x1000 --defsym bar12=pc12dbl+6-0x1002 --defsym foo24=pc24dbl+0x1000000 --defsym bar24=pc24dbl+6-0x1000002 -o /dev/null 2>&1 | FileCheck -DFILE=%t.o --check-prefix=ERROR-RANGE %s +# ERROR-RANGE: error: [[FILE]]:(.text+0x2): relocation R_390_PC16DBL out of range: 65536 is not in [-65536, 65535]; references 'foo16' +# ERROR-RANGE: error: [[FILE]]:(.text+0x6): relocation R_390_PC16DBL out of range: -65538 is not in [-65536, 65535]; references 'bar16' +# ERROR-RANGE: error: [[FILE]]:(.text+0xa): relocation R_390_PC32DBL out of range: 4294967296 is not in [-4294967296, 4294967295]; references 'foo32' +# ERROR-RANGE: error: [[FILE]]:(.text+0x10): relocation R_390_PC32DBL out of range: -4294967298 is not in [-4294967296, 4294967295]; references 'bar32' +# ERROR-RANGE: error: [[FILE]]:(.text+0x15): relocation R_390_PC12DBL out of range: 4096 is not in [-4096, 4095]; references 'foo12' +# ERROR-RANGE: error: [[FILE]]:(.text+0x1b): relocation R_390_PC12DBL out of range: -4098 is not in [-4096, 4095]; references 'bar12' +# ERROR-RANGE: error: [[FILE]]:(.text+0x23): relocation R_390_PC24DBL out of range: 16777216 is not in [-16777216, 16777215]; references 'foo24' +# ERROR-RANGE: error: [[FILE]]:(.text+0x29): relocation R_390_PC24DBL out of range: -16777218 is not in [-16777216, 16777215]; references 'bar24' + +# RUN: not ld.lld %t.o --defsym foo16=pc16dbl+1 --defsym bar16=pc16dbl-1 --defsym foo32=pc32dbl+1 --defsym bar32=pc32dbl-1 --defsym foo12=pc12dbl+1 --defsym bar12=pc12dbl-1 --defsym foo24=pc24dbl+1 --defsym bar24=pc24dbl-1 -o /dev/null 2>&1 | FileCheck -DFILE=%t.o --check-prefix=ERROR-ALIGN %s +# ERROR-ALIGN: error: [[FILE]]:(.text+0x2): improper alignment for relocation R_390_PC16DBL: 0x1 is not aligned to 2 bytes +# ERROR-ALIGN-NEXT: error: [[FILE]]:(.text+0x6): improper alignment for relocation R_390_PC16DBL: 0xFFFFFFFFFFFFFFFB is not aligned to 2 bytes +# ERROR-ALIGN-NEXT: error: [[FILE]]:(.text+0xa): improper alignment for relocation R_390_PC32DBL: 0x1 is not aligned to 2 bytes +# ERROR-ALIGN-NEXT: error: [[FILE]]:(.text+0x10): improper alignment for relocation R_390_PC32DBL: 0xFFFFFFFFFFFFFFF9 is not aligned to 2 bytes +# ERROR-ALIGN-NEXT: error: [[FILE]]:(.text+0x15): improper alignment for relocation R_390_PC12DBL: 0x1 is not aligned to 2 bytes +# ERROR-ALIGN-NEXT: error: [[FILE]]:(.text+0x1b): improper alignment for relocation R_390_PC12DBL: 0xFFFFFFFFFFFFFFF9 is not aligned to 2 bytes +# ERROR-ALIGN-NEXT: error: [[FILE]]:(.text+0x23): improper alignment for relocation R_390_PC24DBL: 0x1 is not aligned to 2 bytes +# ERROR-ALIGN-NEXT: error: [[FILE]]:(.text+0x29): improper alignment for relocation R_390_PC24DBL: 0xFFFFFFFFFFFFFFF9 is not aligned to 2 bytes + +.global _start +.global pc16dbl +.global pc32dbl +.global pc12dbl +.global pc24dbl +_start: +pc16dbl: + je foo16 + jne bar16 +pc32dbl: + jge foo32 + jgne bar32 +pc12dbl: + bprp 5,foo12,0 + bprp 6,bar12,0 +pc24dbl: + bprp 5,0,foo24 + bprp 6,0,bar24 diff --git a/lld/test/ELF/systemz-tls-gd.s b/lld/test/ELF/systemz-tls-gd.s new file mode 100644 index 00000000000000..3976f55a6ae39e --- /dev/null +++ b/lld/test/ELF/systemz-tls-gd.s @@ -0,0 +1,142 @@ +# REQUIRES: systemz +# RUN: llvm-mc -filetype=obj -triple=s390x-unknown-linux %s -o %t.o +# RUN: echo '.tbss; .globl b, c; b: .zero 4; c:' | llvm-mc -filetype=obj -triple=s390x-unknown-linux - -o %t1.o +# RUN: ld.lld -shared -soname=t1.so %t1.o -o %t1.so + +# RUN: ld.lld -shared %t.o %t1.o -o %t.so +# RUN: llvm-readelf -r %t.so | FileCheck --check-prefix=GD-REL %s +# RUN: llvm-objdump -d --no-show-raw-insn %t.so | FileCheck --check-prefix=GD %s +# RUN: llvm-objdump --section .data.rel.ro --full-contents %t.so | FileCheck --check-prefix=GD-DATA %s + +# RUN: ld.lld %t.o %t1.o -o %t.le +# RUN: llvm-readelf -r %t.le | FileCheck --check-prefix=NOREL %s +# RUN: llvm-objdump -d --no-show-raw-insn %t.le | FileCheck --check-prefix=LE %s +# RUN: llvm-objdump --section .data.rel.ro --full-contents %t.le | FileCheck --check-prefix=LE-DATA %s + +# RUN: ld.lld %t.o %t1.so -o %t.ie +# RUN: llvm-readelf -r %t.ie | FileCheck --check-prefix=IE-REL %s +# RUN: llvm-objdump -d --no-show-raw-insn %t.ie | FileCheck --check-prefix=IE %s +# RUN: llvm-objdump --section .data.rel.ro --full-contents %t.ie | FileCheck --check-prefix=IE-DATA %s + +# GD-REL: Relocation section '.rela.dyn' at offset {{.*}} contains 6 entries: +# GD-REL: 0000000000002570 0000000200000036 R_390_TLS_DTPMOD 0000000000000008 a + 0 +# GD-REL-NEXT: 0000000000002578 0000000200000037 R_390_TLS_DTPOFF 0000000000000008 a + 0 +# GD-REL-NEXT: 0000000000002580 0000000300000036 R_390_TLS_DTPMOD 000000000000000c b + 0 +# GD-REL-NEXT: 0000000000002588 0000000300000037 R_390_TLS_DTPOFF 000000000000000c b + 0 +# GD-REL-NEXT: 0000000000002590 0000000400000036 R_390_TLS_DTPMOD 0000000000000010 c + 0 +# GD-REL-NEXT: 0000000000002598 0000000400000037 R_390_TLS_DTPOFF 0000000000000010 c + 0 + +## _GLOBAL_OFFSET_TABLE is at 0x2558 +# GD: larl %r12, 0x2558 + +## GOT offset of the TLS module ID / offset pair for a is at 0x2460 +# GD-NEXT: lgrl %r2, 0x2460 +# GD-NEXT: brasl %r14, 0x1440 +# GD-NEXT: lgf %r2, 0(%r2,%r7) + +## GOT offset of the TLS module ID / offset pair for b is at 0x2468 +# GD-NEXT: lgrl %r2, 0x2468 +# GD-NEXT: brasl %r14, 0x1440 +# GD-NEXT: lgf %r2, 0(%r2,%r7) + +## GOT offset of the TLS module ID / offset pair for c is at 0x2470 +# GD-NEXT: lgrl %r2, 0x2470 +# GD-NEXT: brasl %r14, 0x1440 +# GD-NEXT: lgf %r2, 0(%r2,%r7) + +## Constant pool holding GOT offsets of TLS module ID / offset pairs: +# a: 0x2570 / 0x18 +# b: 0x2580 / 0x28 +# c: 0x2590 / 0x38 +# GD-DATA: 2460 00000000 00000018 00000000 00000028 +# GD-DATA-NEXT: 2470 00000000 00000038 + +# NOREL: no relocations + +## _GLOBAL_OFFSET_TABLE is at 0x1002230 +# LE: larl %r12, 0x1002230 + +## TP offset of a is at 0x1002218 +# LE-NEXT: lgrl %r2, 0x1002218 +# LE-NEXT: brcl 0, +# LE-NEXT: lgf %r2, 0(%r2,%r7) + +## TP offset of b is at 0x1002220 +# LE-NEXT: lgrl %r2, 0x1002220 +# LE-NEXT: brcl 0, +# LE-NEXT: lgf %r2, 0(%r2,%r7) + +## TP offset of c is at 0x1002228 +# LE-NEXT: lgrl %r2, 0x1002228 +# LE-NEXT: brcl 0, +# LE-NEXT: lgf %r2, 0(%r2,%r7) + +## TP offsets +# a: -8 +# b: -4 +# c: 0 +# LE-DATA: 1002218 ffffffff fffffff8 ffffffff fffffffc +# LE-DATA-NEXT: 1002228 00000000 00000000 + + +# IE-REL: Relocation section '.rela.dyn' at offset {{.*}} contains 2 entries: +# IE-REL: 0000000001002430 0000000200000038 R_390_TLS_TPOFF 0000000000000000 b + 0 +# IE-REL-NEXT: 0000000001002438 0000000300000038 R_390_TLS_TPOFF 0000000000000000 c + 0 + +## _GLOBAL_OFFSET_TABLE is at 0x1002418 +# IE: larl %r12, 0x1002418 + +## TP offset of a is at 0x1002340 +# IE-NEXT: lgrl %r2, 0x1002340 +# IE-NEXT: brcl 0, +# IE-NEXT: lgf %r2, 0(%r2,%r7) + +## GOT offset of the TP offset for b is at 0x1002348 +# IE-NEXT: lgrl %r2, 0x1002348 +# IE-NEXT: lg %r2, 0(%r2,%r12) +# IE-NEXT: lgf %r2, 0(%r2,%r7) + +## GOT offset of the TP offset for c is at 0x1002350 +# IE-NEXT: lgrl %r2, 0x1002350 +# IE-NEXT: lg %r2, 0(%r2,%r12) +# IE-NEXT: lgf %r2, 0(%r2,%r7) + +## TP offsets (a) / GOT offset of TP offsets (b, c) +# a: -4 +# b: 0x1002430 / 0x18 +# c: 0x1002438 / 0x20 +# IE-DATA: 1002340 ffffffff fffffffc 00000000 00000018 +# IE-DATA-NEXT: 1002350 00000000 00000020 + + +ear %r7,%a0 +sllg %r7,%r1,32 +ear %r7,%a1 +larl %r12,_GLOBAL_OFFSET_TABLE_ + +lgrl %r2,.LC0 +brasl %r14,__tls_get_offset@PLT:tls_gdcall:a +lgf %r2,0(%r2,%r7) + +lgrl %r2,.LC1 +brasl %r14,__tls_get_offset@PLT:tls_gdcall:b +lgf %r2,0(%r2,%r7) + +lgrl %r2,.LC2 +brasl %r14,__tls_get_offset@PLT:tls_gdcall:c +lgf %r2,0(%r2,%r7) + + .section .data.rel.ro,"aw" + .align 8 +.LC0: + .quad a@TLSGD +.LC1: + .quad b@TLSGD +.LC2: + .quad c@TLSGD + + .section .tbss + .globl a + .zero 8 +a: + .zero 4 diff --git a/lld/test/ELF/systemz-tls-ie.s b/lld/test/ELF/systemz-tls-ie.s new file mode 100644 index 00000000000000..85e2f24cb61f62 --- /dev/null +++ b/lld/test/ELF/systemz-tls-ie.s @@ -0,0 +1,121 @@ +# REQUIRES: systemz +# RUN: llvm-mc -filetype=obj -triple=s390x-unknown-linux %s -o %t.o + +# RUN: ld.lld -shared %t.o -o %t.so +# RUN: llvm-readelf -r %t.so | FileCheck --check-prefix=IE-REL %s +# RUN: llvm-objdump -d --no-show-raw-insn %t.so | FileCheck --check-prefix=IE %s +# RUN: llvm-objdump --section .data --full-contents %t.so | FileCheck --check-prefix=IE-DATA %s + +# RUN: ld.lld %t.o -o %t +# RUN: llvm-readelf -r %t | FileCheck --check-prefix=NOREL %s +# RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck --check-prefix=LE %s +# RUN: llvm-objdump --section .data --full-contents %t | FileCheck --check-prefix=LE-DATA %s +# RUN: llvm-objdump --section .got --full-contents %t | FileCheck --check-prefix=LE-GOT %s + +## With -pie we still have the R_390_RELATIVE for the data element, but all GOT +## entries should be fully resolved without any remaining R_390_TLS_TPOFF. +# RUN: ld.lld -pie %t.o -o %t.pie +# RUN: llvm-readelf -r %t.pie | FileCheck --check-prefix=PIE-REL %s +# RUN: llvm-objdump -d --no-show-raw-insn %t.pie | FileCheck --check-prefix=PIE %s +# RUN: llvm-objdump --section .data --full-contents %t.pie | FileCheck --check-prefix=PIE-DATA %s +# RUN: llvm-objdump --section .got --full-contents %t.pie | FileCheck --check-prefix=PIE-GOT %s + +# IE-REL: Relocation section '.rela.dyn' at offset {{.*}} contains 4 entries: +# IE-REL: 0000000000003478 000000000000000c R_390_RELATIVE 2460 +# IE-REL: 0000000000002460 0000000100000038 R_390_TLS_TPOFF 0000000000000008 a + 0 +# IE-REL: 0000000000002468 0000000200000038 R_390_TLS_TPOFF 000000000000000c b + 0 +# IE-REL: 0000000000002470 0000000300000038 R_390_TLS_TPOFF 0000000000000010 c + 0 + +## TP offset for a is at 0x2460 +# IE: lgrl %r1, 0x2460 +# IE-NEXT: lgf %r1, 0(%r1,%r7) + +## TP offset for b is at 0x2468 +# IE-NEXT: lgrl %r1, 0x2468 +# IE-NEXT: lgf %r1, 0(%r1,%r7) + +## TP offset for c is at 0x2470 +# IE-NEXT: lgrl %r1, 0x2470 +# IE-NEXT: lgf %r1, 0(%r1,%r7) + +## Data element: TP offset for a is at 0x2460 (relocated via R_390_RELATIVE above) +# IE-DATA: 3478 00000000 00000000 + +# NOREL: no relocations + +## TP offset for a is at 0x1002250 +# LE: lgrl %r1, 0x1002250 +# LE-NEXT: lgf %r1, 0(%r1,%r7) + +## TP offset for b is at 0x1002258 +# LE-NEXT: lgrl %r1, 0x1002258 +# LE-NEXT: lgf %r1, 0(%r1,%r7) + +## TP offset for c is at 0x1002260 +# LE-NEXT: lgrl %r1, 0x1002260 +# LE-NEXT: lgf %r1, 0(%r1,%r7) + +## Data element: TP offset for a is at 0x1002250 +# LE-DATA: 00000000 01002250 + +## TP offsets in GOT: +# a: -8 +# b: -4 +# c: 0 +# LE-GOT: 1002238 00000000 00000000 00000000 00000000 +# LE-GOT: 1002248 00000000 00000000 ffffffff fffffff8 +# LE-GOT: 1002258 ffffffff fffffffc 00000000 00000000 + +# PIE-REL: Relocation section '.rela.dyn' at offset {{.*}} contains 1 entries: +# PIE-REL: 00000000000033d0 000000000000000c R_390_RELATIVE 23b8 + +## TP offset for a is at 0x23b8 +# PIE: lgrl %r1, 0x23b8 +# PIE-NEXT: lgf %r1, 0(%r1,%r7) + +## TP offset for b is at 0x23c0 +# PIE-NEXT: lgrl %r1, 0x23c0 +# PIE-NEXT: lgf %r1, 0(%r1,%r7) + +## TP offset for c is at 0x23c8 +# PIE-NEXT: lgrl %r1, 0x23c8 +# PIE-NEXT: lgf %r1, 0(%r1,%r7) + +## Data element: TP offset for a is at 0x23b8 (relocated via R_390_RELATIVE above) +# PIE-DATA: 33d0 00000000 00000000 + +## TP offsets in GOT: +# a: -8 +# b: -4 +# c: 0 +# PIE-GOT: 23a0 00000000 000022d0 00000000 00000000 +# PIE-GOT: 23b0 00000000 00000000 ffffffff fffffff8 +# PIE-GOT: 23c0 ffffffff fffffffc 00000000 00000000 + +ear %r7,%a0 +sllg %r7,%r1,32 +ear %r7,%a1 + +lgrl %r1, a@indntpoff +lgf %r1,0(%r1,%r7) + +lgrl %r1, b@indntpoff +lgf %r1,0(%r1,%r7) + +lgrl %r1, c@indntpoff +lgf %r1,0(%r1,%r7) + + .data + .reloc .,R_390_TLS_IE64,a + .space 8 + + .section .tbss + .globl a + .globl b + .globl c + .zero 8 +a: + .zero 4 +b: + .zero 4 +c: diff --git a/lld/test/ELF/systemz-tls-ld.s b/lld/test/ELF/systemz-tls-ld.s new file mode 100644 index 00000000000000..2cb36d7294f2b0 --- /dev/null +++ b/lld/test/ELF/systemz-tls-ld.s @@ -0,0 +1,114 @@ +# REQUIRES: systemz +# RUN: llvm-mc -filetype=obj -triple=s390x-unknown-linux %s -o %t.o + +# RUN: ld.lld -shared %t.o -o %t.so +# RUN: llvm-readelf -r %t.so | FileCheck --check-prefix=LD-REL %s +# RUN: llvm-objdump -d --no-show-raw-insn %t.so | FileCheck --check-prefix=LD %s +# RUN: llvm-objdump --section .data.rel.ro --full-contents %t.so | FileCheck --check-prefix=LD-DATA %s + +# RUN: ld.lld %t.o -o %t +# RUN: llvm-readelf -r %t | FileCheck --check-prefix=NOREL %s +# RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck --check-prefix=LE %s +# RUN: llvm-objdump --section .data.rel.ro --full-contents %t | FileCheck --check-prefix=LE-DATA %s + +# LD-REL: Relocation section '.rela.dyn' at offset {{.*}} contains 1 entries: +# LD-REL: 00000000000024f8 0000000000000036 R_390_TLS_DTPMOD 0 + +## _GLOBAL_OFFSET_TABLE is at 0x24e0 +# LD: larl %r12, 0x24e0 + +## GOT offset of the LDM TLS module ID is at 0x23e0 +# LD-NEXT: lgrl %r2, 0x23e0 +# LD-NEXT: brasl %r14, 0x13c0 +# LD-NEXT: la %r2, 0(%r2,%r7) + +## DTP offset for a is at 0x23e8 +# LD-NEXT: lgrl %r1, 0x23e8 +# LD-NEXT: lgf %r1, 0(%r1,%r2) + +## DTP offset for b is at 0x23f0 +# LD-NEXT: lgrl %r1, 0x23f0 +# LD-NEXT: lgf %r1, 0(%r1,%r2) + +## DTP offset for c is at 0x23f8 +# LD-NEXT: lgrl %r1, 0x23f8 +# LD-NEXT: lgf %r1, 0(%r1,%r2) + +## Constant pool holding GOT offsets of TLS module ID and DTP offsets: +# TLS module ID: 0x24f8 / 0x18 +# a: 8 +# b: 12 +# c: 16 +# LD-DATA: 23e0 00000000 00000018 00000000 00000008 +# LD-DATA: 23f0 00000000 0000000c 00000000 00000010 + +# NOREL: no relocations + +## _GLOBAL_OFFSET_TABLE is at 0x1002230 +# LE: larl %r12, 0x1002230 + +## GOT offset of the LDM TLS module ID is at 0x1002210 +# LE-NEXT: lgrl %r2, 0x1002210 +# LE-NEXT: brcl 0, +# LE-NEXT: la %r2, 0(%r2,%r7) + +## TP offset for a is at 0x1002218 +# LE-NEXT: lgrl %r1, 0x1002218 +# LE-NEXT: lgf %r1, 0(%r1,%r2) + +## TP offset for b is at 0x1002220 +# LE-NEXT: lgrl %r1, 0x1002220 +# LE-NEXT: lgf %r1, 0(%r1,%r2) + +## TP offset for c is at 0x1002228 +# LE-NEXT: lgrl %r1, 0x1002228 +# LE-NEXT: lgf %r1, 0(%r1,%r2) + +## zeroed LDM / TP offsets: +# LDM TLS: 0 +# a: -8 +# b: -4 +# c: 0 +# LE-DATA: 1002210 00000000 00000000 ffffffff fffffff8 +# LE-DATA: 1002220 ffffffff fffffffc 00000000 00000000 + + +ear %r7,%a0 +sllg %r7,%r1,32 +ear %r7,%a1 +larl %r12,_GLOBAL_OFFSET_TABLE_ + +lgrl %r2,.LC0 +brasl %r14,__tls_get_offset@PLT:tls_ldcall:a +la %r2,0(%r2,%r7) + +lgrl %r1, .LC1 +lgf %r1,0(%r1,%r2) + +lgrl %r1, .LC2 +lgf %r1,0(%r1,%r2) + +lgrl %r1, .LC3 +lgf %r1,0(%r1,%r2) + + .section .data.rel.ro,"aw" + .align 8 +.LC0: + .quad a@TLSLDM +.LC1: + .quad a@DTPOFF +.LC2: + .quad b@DTPOFF +.LC3: + .quad c@DTPOFF + + .section .tbss + .globl a + .globl b + .globl c + .zero 8 +a: + .zero 4 +b: + .zero 4 +c: diff --git a/lld/test/ELF/systemz-tls-le.s b/lld/test/ELF/systemz-tls-le.s new file mode 100644 index 00000000000000..9e41fc768da391 --- /dev/null +++ b/lld/test/ELF/systemz-tls-le.s @@ -0,0 +1,61 @@ +# REQUIRES: systemz +# RUN: llvm-mc -filetype=obj -triple=s390x-unknown-linux %s -o %t.o + +# RUN: ld.lld %t.o -o %t +# RUN: llvm-readelf -r %t | FileCheck --check-prefix=NOREL %s +# RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck --check-prefix=LE %s +# RUN: llvm-objdump --section .data.rel.ro --full-contents %t | FileCheck --check-prefix=LE-DATA %s + +# NOREL: no relocations + +## TP offset for a is at 0x1002200 +# LE: lgrl %r1, 0x1002200 +# LE-NEXT: lgf %r1, 0(%r1,%r7) + +## TP offset for b is at 0x1002208 +# LE-NEXT: lgrl %r1, 0x1002208 +# LE-NEXT: lgf %r1, 0(%r1,%r7) + +## TP offset for c is at 0x1002210 +# LE-NEXT: lgrl %r1, 0x1002210 +# LE-NEXT: lgf %r1, 0(%r1,%r7) + +## TP offsets: +# a: -8 +# b: -4 +# c: 0 +# LE-DATA: 1002200 ffffffff fffffff8 ffffffff fffffffc +# LE-DATA: 1002210 00000000 00000000 + +ear %r7,%a0 +sllg %r7,%r1,32 +ear %r7,%a1 + +lgrl %r1, .LC0 +lgf %r1,0(%r1,%r7) + +lgrl %r1, .LC1 +lgf %r1,0(%r1,%r7) + +lgrl %r1, .LC2 +lgf %r1,0(%r1,%r7) + + .section .data.rel.ro,"aw" + .align 8 +.LC0: + .quad a@ntpoff +.LC1: + .quad b@ntpoff +.LC2: + .quad c@ntpoff + + .section .tbss + .globl a + .globl b + .globl c + .zero 8 +a: + .zero 4 +b: + .zero 4 +c: diff --git a/lld/test/MachO/invalid/invalid-lto-object-path.ll b/lld/test/MachO/invalid/invalid-lto-object-path.ll index 75c6a97e446fb2..c862538d592ce8 100644 --- a/lld/test/MachO/invalid/invalid-lto-object-path.ll +++ b/lld/test/MachO/invalid/invalid-lto-object-path.ll @@ -1,4 +1,4 @@ -; REQUIRES: x86 +; REQUIRES: x86, non-root-user ;; Creating read-only directories with `chmod 400` isn't supported on Windows ; UNSUPPORTED: system-windows diff --git a/lld/test/MachO/thinlto-emit-imports.ll b/lld/test/MachO/thinlto-emit-imports.ll index 47a612bd0a7b56..88f766f59c8877 100644 --- a/lld/test/MachO/thinlto-emit-imports.ll +++ b/lld/test/MachO/thinlto-emit-imports.ll @@ -1,4 +1,4 @@ -; REQUIRES: x86 +; REQUIRES: x86, non-root-user ; RUN: rm -rf %t; split-file %s %t ; Generate summary sections and test lld handling. diff --git a/lld/test/MinGW/driver.test b/lld/test/MinGW/driver.test index 559a32bfa242f8..057de2a22f6a0c 100644 --- a/lld/test/MinGW/driver.test +++ b/lld/test/MinGW/driver.test @@ -409,6 +409,13 @@ LTO_OPTS: -mllvm:-mcpu=x86-64 -opt:lldlto=2 -dwodir:foo -lto-cs-profile-generate RUN: ld.lld -### foo.o -m i386pep --lto-O2 --lto-CGO1 --lto-cs-profile-generate --lto-cs-profile-file=foo 2>&1 | FileCheck -check-prefix=LTO_OPTS2 %s LTO_OPTS2:-opt:lldlto=2 -opt:lldltocgo=1 -lto-cs-profile-generate -lto-cs-profile-file:foo +RUN: ld.lld -### foo.o -m i386pe -plugin-opt=emit-asm 2>&1 | FileCheck -check-prefix=LTO_EMIT_ASM %s +RUN: ld.lld -### foo.o -m i386pe --lto-emit-asm 2>&1 | FileCheck -check-prefix=LTO_EMIT_ASM %s +LTO_EMIT_ASM: -lldemit:asm + +RUN: ld.lld -### foo.o -m i386pe -plugin-opt=emit-llvm 2>&1 | FileCheck -check-prefix=LTO_EMIT_LLVM %s +LTO_EMIT_LLVM: -lldemit:llvm + Test GCC specific LTO options that GCC passes unconditionally, that we ignore. RUN: ld.lld -### foo.o -m i386pep -plugin /usr/lib/gcc/x86_64-w64-mingw32/10-posix/liblto_plugin.so -plugin-opt=/usr/lib/gcc/x86_64-w64-mingw32/10-posix/lto-wrapper -plugin-opt=-fresolution=/tmp/ccM9d4fP.res -plugin-opt=-pass-through=-lmingw32 2> /dev/null diff --git a/lld/test/lit.cfg.py b/lld/test/lit.cfg.py index b3e07f1f823cc4..d309c2ad4ee284 100644 --- a/lld/test/lit.cfg.py +++ b/lld/test/lit.cfg.py @@ -83,6 +83,7 @@ "PowerPC": "ppc", "RISCV": "riscv", "Sparc": "sparc", + "SystemZ": "systemz", "WebAssembly": "wasm", "X86": "x86", }, diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt index c2fb77d5a371f4..44f2850b92d522 100644 --- a/llvm/CMakeLists.txt +++ b/llvm/CMakeLists.txt @@ -35,7 +35,7 @@ endif() if(NOT DEFINED LLVM_SHLIB_SYMBOL_VERSION) # "Symbol version prefix for libLLVM.so" - set(LLVM_SHLIB_SYMBOL_VERSION "LLVM_${LLVM_VERSION_MAJOR}") + set(LLVM_SHLIB_SYMBOL_VERSION "LLVM_${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}") endif() if ((CMAKE_GENERATOR MATCHES "Visual Studio") AND (MSVC_TOOLSET_VERSION LESS 142) AND (CMAKE_GENERATOR_TOOLSET STREQUAL "")) diff --git a/llvm/cmake/modules/AddLLVM.cmake b/llvm/cmake/modules/AddLLVM.cmake index 5e989618552824..5fc663d10b8f77 100644 --- a/llvm/cmake/modules/AddLLVM.cmake +++ b/llvm/cmake/modules/AddLLVM.cmake @@ -108,7 +108,7 @@ function(add_llvm_symbol_exports target_name export_file) COMMAND "${Python3_EXECUTABLE}" "-c" "import sys; \ lines = [' ' + l.rstrip() for l in sys.stdin] + [' local: *;']; \ - print('LLVM_${LLVM_VERSION_MAJOR} {'); \ + print('LLVM_${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR} {'); \ print(' global:') if len(lines) > 1 else None; \ print(';\\n'.join(lines) + '\\n};')" < ${export_file} > ${native_export_file} @@ -646,9 +646,9 @@ function(llvm_add_library name) if(UNIX AND NOT APPLE AND NOT ARG_SONAME) set_target_properties(${name} PROPERTIES - # Since 4.0.0, the ABI version is indicated by the major version - SOVERSION ${LLVM_VERSION_MAJOR}${LLVM_VERSION_SUFFIX} - VERSION ${LLVM_VERSION_MAJOR}${LLVM_VERSION_SUFFIX}) + # Since 18.1.0, the ABI version is indicated by the major and minor version. + SOVERSION ${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}${LLVM_VERSION_SUFFIX} + VERSION ${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}${LLVM_VERSION_SUFFIX}) endif() endif() diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst index 82f4a7a15c9c13..5b3210138f2f89 100644 --- a/llvm/docs/ReleaseNotes.rst +++ b/llvm/docs/ReleaseNotes.rst @@ -164,6 +164,30 @@ Changes to the MIPS Backend Changes to the PowerPC Backend ------------------------------ +* LLJIT's JIT linker now defaults to JITLink on 64-bit ELFv2 targets. +* Initial-exec TLS model is supported on AIX. +* Implemented new resource based scheduling model of POWER7 and POWER8. +* ``frexp`` libcall now references correct symbol name for ``fp128``. +* Optimized materialization of 64-bit immediates, code generation of + ``vec_promote`` and atomics. +* Global constant strings are pooled in the TOC under one entry to reduce the + number of entries in the TOC. +* Added a number of missing Power10 extended mnemonics. +* Added the SCV instruction. +* Fixed register class for the paddi instruction. +* Optimize VPERM and fix code order for swapping vector operands on LE. +* Added various bug fixes and code gen improvements. + +AIX Support/improvements: + +* Support for a non-TOC-based access sequence for the local-exec TLS model (called small local-exec). +* XCOFF toc-data peephole optimization and bug fixes. +* Move less often used __ehinfo TOC entries to the end of the TOC section. +* Fixed problems when the AIX libunwind unwinds starting from a signal handler + and the function that raised the signal happens to be a leaf function that + shares the stack frame with its caller or a leaf function that does not store + the stack frame backchain. + Changes to the RISC-V Backend ----------------------------- @@ -317,6 +341,7 @@ Changes to the LLVM tools * llvm-symbolizer now treats invalid input as an address for which source information is not found. +* Fixed big-endian support in llvm-symbolizer's DWARF location parser. * llvm-readelf now supports ``--extra-sym-info`` (``-X``) to display extra information (section name) when showing symbols. @@ -328,6 +353,13 @@ Changes to the LLVM tools * llvm-objcopy now supports ``--gap-fill`` and ``--pad-to`` options, for ELF input and binary output files only. +* Supported parsing XCOFF auxiliary symbols in obj2yaml. + +* ``llvm-ranlib`` now supports ``-X`` on AIX to specify the type of object file + ranlib should examine. + +* ``llvm-nm`` now supports ``--export-symbol`` to ignore the import symbol file. + * llvm-rc and llvm-windres now accept file path references in ``.rc`` files concatenated from multiple string literals. @@ -387,6 +419,12 @@ Changes to Sanitizers --------------------- * HWASan now defaults to detecting use-after-scope bugs. +Changes to the Profile Runtime +------------------------------ + +* Public header ``profile/instr_prof_interface.h`` is added to declare four + API functions to fine tune profile collection. + Other Changes ------------- diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h index af3ad822e0b0de..0880f9c65aa45d 100644 --- a/llvm/include/llvm/Analysis/ScalarEvolution.h +++ b/llvm/include/llvm/Analysis/ScalarEvolution.h @@ -1314,6 +1314,13 @@ class ScalarEvolution { void getPoisonGeneratingValues(SmallPtrSetImpl &Result, const SCEV *S); + /// Check whether it is poison-safe to represent the expression S using the + /// instruction I. If such a replacement is performed, the poison flags of + /// instructions in DropPoisonGeneratingInsts must be dropped. + bool canReuseInstruction( + const SCEV *S, Instruction *I, + SmallVectorImpl &DropPoisonGeneratingInsts); + class FoldID { const SCEV *Op = nullptr; const Type *Ty = nullptr; diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h index 81cdd39afc6bab..f17ba75e3efa6a 100644 --- a/llvm/include/llvm/BinaryFormat/ELF.h +++ b/llvm/include/llvm/BinaryFormat/ELF.h @@ -1464,6 +1464,7 @@ enum { PT_OPENBSD_RANDOMIZE = 0x65a3dbe6, // Fill with random data. PT_OPENBSD_WXNEEDED = 0x65a3dbe7, // Program does W^X violations. PT_OPENBSD_NOBTCFI = 0x65a3dbe8, // Do not enforce branch target CFI. + PT_OPENBSD_SYSCALLS = 0x65a3dbe9, // System call sites. PT_OPENBSD_BOOTDATA = 0x65a41be6, // Section for boot arguments. // ARM program header types. diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index 2acb45837c480a..4b2db80bc1ec30 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -4184,6 +4184,68 @@ void ScalarEvolution::getPoisonGeneratingValues( Result.insert(SU->getValue()); } +bool ScalarEvolution::canReuseInstruction( + const SCEV *S, Instruction *I, + SmallVectorImpl &DropPoisonGeneratingInsts) { + // If the instruction cannot be poison, it's always safe to reuse. + if (programUndefinedIfPoison(I)) + return true; + + // Otherwise, it is possible that I is more poisonous that S. Collect the + // poison-contributors of S, and then check whether I has any additional + // poison-contributors. Poison that is contributed through poison-generating + // flags is handled by dropping those flags instead. + SmallPtrSet PoisonVals; + getPoisonGeneratingValues(PoisonVals, S); + + SmallVector Worklist; + SmallPtrSet Visited; + Worklist.push_back(I); + while (!Worklist.empty()) { + Value *V = Worklist.pop_back_val(); + if (!Visited.insert(V).second) + continue; + + // Avoid walking large instruction graphs. + if (Visited.size() > 16) + return false; + + // Either the value can't be poison, or the S would also be poison if it + // is. + if (PoisonVals.contains(V) || isGuaranteedNotToBePoison(V)) + continue; + + auto *I = dyn_cast(V); + if (!I) + return false; + + // Disjoint or instructions are interpreted as adds by SCEV. However, we + // can't replace an arbitrary add with disjoint or, even if we drop the + // flag. We would need to convert the or into an add. + if (auto *PDI = dyn_cast(I)) + if (PDI->isDisjoint()) + return false; + + // FIXME: Ignore vscale, even though it technically could be poison. Do this + // because SCEV currently assumes it can't be poison. Remove this special + // case once we proper model when vscale can be poison. + if (auto *II = dyn_cast(I); + II && II->getIntrinsicID() == Intrinsic::vscale) + continue; + + if (canCreatePoison(cast(I), /*ConsiderFlagsAndMetadata*/ false)) + return false; + + // If the instruction can't create poison, we can recurse to its operands. + if (I->hasPoisonGeneratingFlagsOrMetadata()) + DropPoisonGeneratingInsts.push_back(I); + + for (Value *Op : I->operands()) + Worklist.push_back(Op); + } + return true; +} + const SCEV * ScalarEvolution::getSequentialMinMaxExpr(SCEVTypes Kind, SmallVectorImpl &Ops) { diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 5d6c3465a0c364..412115eb649c2f 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -5083,8 +5083,13 @@ void computeKnownFPClass(const Value *V, const APInt &DemandedElts, Op->getOperand(0)->getType()->getScalarType()->getFltSemantics(); // All subnormal inputs should be in the normal range in the result type. - if (APFloat::isRepresentableAsNormalIn(SrcTy, DstTy)) + if (APFloat::isRepresentableAsNormalIn(SrcTy, DstTy)) { + if (Known.KnownFPClasses & fcPosSubnormal) + Known.KnownFPClasses |= fcPosNormal; + if (Known.KnownFPClasses & fcNegSubnormal) + Known.KnownFPClasses |= fcNegNormal; Known.knownNot(fcSubnormal); + } // Sign bit of a nan isn't guaranteed. if (!Known.isKnownNeverNaN()) diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 3b2cf319109273..c0c22e36004f72 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -596,6 +596,8 @@ llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall, LostDebugLocObserver &LocObserver, MachineInstr *MI) { auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering(); const char *Name = TLI.getLibcallName(Libcall); + if (!Name) + return LegalizerHelper::UnableToLegalize; const CallingConv::ID CC = TLI.getLibcallCallingConv(Libcall); return createLibcall(MIRBuilder, Name, Result, Args, CC, LocObserver, MI); } diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 98d8a6d9409f25..3135ec73a99e76 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -9253,7 +9253,7 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) { // Transfer chain users from old loads to the new load. for (LoadSDNode *L : Loads) - DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1)); + DAG.makeEquivalentMemoryOrdering(L, NewLoad); if (!NeedsBswap) return NewLoad; diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 2d6b5bc983e739..0287856560e91a 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -10718,6 +10718,14 @@ AArch64TargetLowering::getRegForInlineAsmConstraint( parseConstraintCode(Constraint) != AArch64CC::Invalid) return std::make_pair(unsigned(AArch64::NZCV), &AArch64::CCRRegClass); + if (Constraint == "{za}") { + return std::make_pair(unsigned(AArch64::ZA), &AArch64::MPRRegClass); + } + + if (Constraint == "{zt0}") { + return std::make_pair(unsigned(AArch64::ZT0), &AArch64::ZTRRegClass); + } + // Use the default implementation in TargetLowering to convert the register // constraint into a member of a register class. std::pair Res; diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp index f86e6947c9cdb0..48e1c1bc73022c 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -507,6 +507,10 @@ bool AArch64RegisterInfo::isAsmClobberable(const MachineFunction &MF, MCRegisterInfo::regsOverlap(PhysReg, AArch64::X16)) return true; + // ZA/ZT0 registers are reserved but may be permitted in the clobber list. + if (PhysReg == AArch64::ZA || PhysReg == AArch64::ZT0) + return true; + return !isReservedReg(MF, PhysReg); } diff --git a/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index 3c673ae938fdec..36aab383da68d2 100644 --- a/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -2920,6 +2920,11 @@ bool MipsAsmParser::loadAndAddSymbolAddress(const MCExpr *SymExpr, (Res.getSymA()->getSymbol().isELF() && cast(Res.getSymA()->getSymbol()).getBinding() == ELF::STB_LOCAL); + // For O32, "$"-prefixed symbols are recognized as temporary while + // .L-prefixed symbols are not (PrivateGlobalPrefix is "$"). Recognize ".L" + // manually. + if (ABI.IsO32() && Res.getSymA()->getSymbol().getName().starts_with(".L")) + IsLocalSym = true; bool UseXGOT = STI->hasFeature(Mips::FeatureXGOT) && !IsLocalSym; // The case where the result register is $25 is somewhat special. If the @@ -6359,7 +6364,7 @@ bool MipsAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) { return true; SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); - MCSymbol *Sym = getContext().getOrCreateSymbol("$" + Identifier); + MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier); // Otherwise create a symbol reference. const MCExpr *SymRef = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext()); diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 26ed74108ec36c..18a4223d481ef0 100644 --- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -1635,7 +1635,8 @@ class BitPermutationSelector { default: break; case ISD::ROTL: if (isa(V.getOperand(1))) { - unsigned RotAmt = V.getConstantOperandVal(1); + assert(isPowerOf2_32(NumBits) && "rotl bits should be power of 2!"); + unsigned RotAmt = V.getConstantOperandVal(1) & (NumBits - 1); const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second; @@ -1648,15 +1649,20 @@ class BitPermutationSelector { case ISD::SHL: case PPCISD::SHL: if (isa(V.getOperand(1))) { - unsigned ShiftAmt = V.getConstantOperandVal(1); + // sld takes 7 bits, slw takes 6. + unsigned ShiftAmt = V.getConstantOperandVal(1) & ((NumBits << 1) - 1); const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second; - for (unsigned i = ShiftAmt; i < NumBits; ++i) - Bits[i] = LHSBits[i - ShiftAmt]; - - for (unsigned i = 0; i < ShiftAmt; ++i) - Bits[i] = ValueBit(ValueBit::ConstZero); + if (ShiftAmt >= NumBits) { + for (unsigned i = 0; i < NumBits; ++i) + Bits[i] = ValueBit(ValueBit::ConstZero); + } else { + for (unsigned i = ShiftAmt; i < NumBits; ++i) + Bits[i] = LHSBits[i - ShiftAmt]; + for (unsigned i = 0; i < ShiftAmt; ++i) + Bits[i] = ValueBit(ValueBit::ConstZero); + } return std::make_pair(Interesting = true, &Bits); } @@ -1664,15 +1670,20 @@ class BitPermutationSelector { case ISD::SRL: case PPCISD::SRL: if (isa(V.getOperand(1))) { - unsigned ShiftAmt = V.getConstantOperandVal(1); + // srd takes lowest 7 bits, srw takes 6. + unsigned ShiftAmt = V.getConstantOperandVal(1) & ((NumBits << 1) - 1); const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second; - for (unsigned i = 0; i < NumBits - ShiftAmt; ++i) - Bits[i] = LHSBits[i + ShiftAmt]; - - for (unsigned i = NumBits - ShiftAmt; i < NumBits; ++i) - Bits[i] = ValueBit(ValueBit::ConstZero); + if (ShiftAmt >= NumBits) { + for (unsigned i = 0; i < NumBits; ++i) + Bits[i] = ValueBit(ValueBit::ConstZero); + } else { + for (unsigned i = 0; i < NumBits - ShiftAmt; ++i) + Bits[i] = LHSBits[i + ShiftAmt]; + for (unsigned i = NumBits - ShiftAmt; i < NumBits; ++i) + Bits[i] = ValueBit(ValueBit::ConstZero); + } return std::make_pair(Interesting = true, &Bits); } diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 7895d74f06d12a..80447d03c000b8 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -5033,60 +5033,56 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, MVT IndexContainerVT = ContainerVT.changeVectorElementType(IndexVT.getScalarType()); - // Base case for the recursion just below - handle the worst case - // single source permutation. Note that all the splat variants - // are handled above. - if (V2.isUndef()) { + SDValue Gather; + // TODO: This doesn't trigger for i64 vectors on RV32, since there we + // encounter a bitcasted BUILD_VECTOR with low/high i32 values. + if (SDValue SplatValue = DAG.getSplatValue(V1, /*LegalTypes*/ true)) { + Gather = lowerScalarSplat(SDValue(), SplatValue, VL, ContainerVT, DL, DAG, + Subtarget); + } else { V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget); - SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS); - LHSIndices = convertToScalableVector(IndexContainerVT, LHSIndices, DAG, - Subtarget); - SDValue Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices, - DAG.getUNDEF(ContainerVT), TrueMask, VL); - return convertFromScalableVector(VT, Gather, DAG, Subtarget); - } - - // Translate the gather index we computed above (and possibly swapped) - // back to a shuffle mask. This step should disappear once we complete - // the migration to recursive design. - SmallVector ShuffleMaskLHS; - ShuffleMaskLHS.reserve(GatherIndicesLHS.size()); - for (SDValue GatherIndex : GatherIndicesLHS) { - if (GatherIndex.isUndef()) { - ShuffleMaskLHS.push_back(-1); - continue; + // If only one index is used, we can use a "splat" vrgather. + // TODO: We can splat the most-common index and fix-up any stragglers, if + // that's beneficial. + if (LHSIndexCounts.size() == 1) { + int SplatIndex = LHSIndexCounts.begin()->getFirst(); + Gather = DAG.getNode(GatherVXOpc, DL, ContainerVT, V1, + DAG.getConstant(SplatIndex, DL, XLenVT), + DAG.getUNDEF(ContainerVT), TrueMask, VL); + } else { + SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS); + LHSIndices = + convertToScalableVector(IndexContainerVT, LHSIndices, DAG, Subtarget); + + Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices, + DAG.getUNDEF(ContainerVT), TrueMask, VL); } - auto *IdxC = cast(GatherIndex); - ShuffleMaskLHS.push_back(IdxC->getZExtValue()); } - // Recursively invoke lowering for the LHS as if there were no RHS. - // This allows us to leverage all of our single source permute tricks. - SDValue Gather = - DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT), ShuffleMaskLHS); - Gather = convertToScalableVector(ContainerVT, Gather, DAG, Subtarget); + // If a second vector operand is used by this shuffle, blend it in with an + // additional vrgather. + if (!V2.isUndef()) { + V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget); - // Blend in second vector source with an additional vrgather. - V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget); + MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1); + SelectMask = + convertToScalableVector(MaskContainerVT, SelectMask, DAG, Subtarget); - MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1); - SelectMask = - convertToScalableVector(MaskContainerVT, SelectMask, DAG, Subtarget); - - // If only one index is used, we can use a "splat" vrgather. - // TODO: We can splat the most-common index and fix-up any stragglers, if - // that's beneficial. - if (RHSIndexCounts.size() == 1) { - int SplatIndex = RHSIndexCounts.begin()->getFirst(); - Gather = DAG.getNode(GatherVXOpc, DL, ContainerVT, V2, - DAG.getConstant(SplatIndex, DL, XLenVT), Gather, - SelectMask, VL); - } else { - SDValue RHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesRHS); - RHSIndices = - convertToScalableVector(IndexContainerVT, RHSIndices, DAG, Subtarget); - Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V2, RHSIndices, Gather, - SelectMask, VL); + // If only one index is used, we can use a "splat" vrgather. + // TODO: We can splat the most-common index and fix-up any stragglers, if + // that's beneficial. + if (RHSIndexCounts.size() == 1) { + int SplatIndex = RHSIndexCounts.begin()->getFirst(); + Gather = DAG.getNode(GatherVXOpc, DL, ContainerVT, V2, + DAG.getConstant(SplatIndex, DL, XLenVT), Gather, + SelectMask, VL); + } else { + SDValue RHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesRHS); + RHSIndices = + convertToScalableVector(IndexContainerVT, RHSIndices, DAG, Subtarget); + Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V2, RHSIndices, Gather, + SelectMask, VL); + } } return convertFromScalableVector(VT, Gather, DAG, Subtarget); @@ -14658,8 +14654,8 @@ static SDValue useInversedSetcc(SDNode *N, SelectionDAG &DAG, ISD::CondCode CC = cast(Cond.getOperand(2))->get(); if (CC == ISD::SETEQ && LHS.getOpcode() == ISD::AND && isa(LHS.getOperand(1)) && isNullConstant(RHS)) { - uint64_t MaskVal = LHS.getConstantOperandVal(1); - if (isPowerOf2_64(MaskVal) && !isInt<12>(MaskVal)) + const APInt &MaskVal = LHS.getConstantOperandAPInt(1); + if (MaskVal.isPowerOf2() && !MaskVal.isSignedIntN(12)) return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CondVT, LHS, RHS, ISD::SETNE), False, True); @@ -15565,7 +15561,7 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, MGN->getMemOperand(), IndexType, MGN->getExtensionType()); if (Index.getOpcode() == ISD::BUILD_VECTOR && - MGN->getExtensionType() == ISD::NON_EXTLOAD) { + MGN->getExtensionType() == ISD::NON_EXTLOAD && isTypeLegal(VT)) { if (std::optional SimpleVID = isSimpleVIDSequence(Index); SimpleVID && SimpleVID->StepDenominator == 1) { const int64_t StepNumerator = SimpleVID->StepNumerator; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index 592962cebe8973..d5b1ddfbeb3dc9 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -1229,7 +1229,8 @@ bool RISCVInstrInfo::optimizeCondBranch(MachineInstr &MI) const { MachineBasicBlock::reverse_iterator II(&MI), E = MBB->rend(); auto DefC1 = std::find_if(++II, E, [&](const MachineInstr &I) -> bool { int64_t Imm; - return isLoadImm(&I, Imm) && Imm == C1; + return isLoadImm(&I, Imm) && Imm == C1 && + I.getOperand(0).getReg().isVirtual(); }); if (DefC1 != E) return DefC1->getOperand(0).getReg(); diff --git a/llvm/lib/Target/Sparc/Sparc.td b/llvm/lib/Target/Sparc/Sparc.td index 7b103395652433..38a59e650f33c7 100644 --- a/llvm/lib/Target/Sparc/Sparc.td +++ b/llvm/lib/Target/Sparc/Sparc.td @@ -72,6 +72,20 @@ def TuneSlowRDPC : SubtargetFeature<"slow-rdpc", "HasSlowRDPC", "true", //==== Features added predmoninantly for LEON subtarget support include "LeonFeatures.td" +//==== Register allocation tweaks needed by some low-level software +foreach i = 1 ... 7 in + def FeatureReserveG#i : SubtargetFeature<"reserve-g"#i, "ReserveRegister["#i#" + SP::G0]", "true", + "Reserve G"#i#", making it unavailable as a GPR">; +foreach i = 0 ... 5 in + def FeatureReserveO#i : SubtargetFeature<"reserve-o"#i, "ReserveRegister["#i#" + SP::O0]", "true", + "Reserve O"#i#", making it unavailable as a GPR">; +foreach i = 0 ... 7 in + def FeatureReserveL#i : SubtargetFeature<"reserve-l"#i, "ReserveRegister["#i#" + SP::L0]", "true", + "Reserve L"#i#", making it unavailable as a GPR">; +foreach i = 0 ... 5 in + def FeatureReserveI#i : SubtargetFeature<"reserve-i"#i, "ReserveRegister["#i#" + SP::I0]", "true", + "Reserve I"#i#", making it unavailable as a GPR">; + //===----------------------------------------------------------------------===// // Register File, Calling Conv, Instruction Descriptions //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/llvm/lib/Target/Sparc/SparcISelLowering.cpp index 78bdf3ae9a84ba..bdefb0841a124b 100644 --- a/llvm/lib/Target/Sparc/SparcISelLowering.cpp +++ b/llvm/lib/Target/Sparc/SparcISelLowering.cpp @@ -13,6 +13,7 @@ #include "SparcISelLowering.h" #include "MCTargetDesc/SparcMCExpr.h" +#include "MCTargetDesc/SparcMCTargetDesc.h" #include "SparcMachineFunctionInfo.h" #include "SparcRegisterInfo.h" #include "SparcTargetMachine.h" @@ -28,6 +29,7 @@ #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/Module.h" #include "llvm/Support/ErrorHandling.h" @@ -729,6 +731,30 @@ SDValue SparcTargetLowering::LowerFormalArguments_64( return Chain; } +// Check whether any of the argument registers are reserved +static bool isAnyArgRegReserved(const SparcRegisterInfo *TRI, + const MachineFunction &MF) { + // The register window design means that outgoing parameters at O* + // will appear in the callee as I*. + // Be conservative and check both sides of the register names. + bool Outgoing = + llvm::any_of(SP::GPROutgoingArgRegClass, [TRI, &MF](MCPhysReg r) { + return TRI->isReservedReg(MF, r); + }); + bool Incoming = + llvm::any_of(SP::GPRIncomingArgRegClass, [TRI, &MF](MCPhysReg r) { + return TRI->isReservedReg(MF, r); + }); + return Outgoing || Incoming; +} + +static void emitReservedArgRegCallError(const MachineFunction &MF) { + const Function &F = MF.getFunction(); + F.getContext().diagnose(DiagnosticInfoUnsupported{ + F, ("SPARC doesn't support" + " function calls if any of the argument registers is reserved.")}); +} + SDValue SparcTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl &InVals) const { @@ -805,6 +831,7 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI, bool &isTailCall = CLI.IsTailCall; CallingConv::ID CallConv = CLI.CallConv; bool isVarArg = CLI.IsVarArg; + MachineFunction &MF = DAG.getMachineFunction(); // Analyze operands of the call, assigning locations to each operand. SmallVector ArgLocs; @@ -1055,6 +1082,10 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI, ((hasReturnsTwice) ? TRI->getRTCallPreservedMask(CallConv) : TRI->getCallPreservedMask(DAG.getMachineFunction(), CallConv)); + + if (isAnyArgRegReserved(TRI, MF)) + emitReservedArgRegCallError(MF); + assert(Mask && "Missing call preserved mask for calling convention"); Ops.push_back(DAG.getRegisterMask(Mask)); @@ -1125,6 +1156,13 @@ Register SparcTargetLowering::getRegisterByName(const char* RegName, LLT VT, .Case("g4", SP::G4).Case("g5", SP::G5).Case("g6", SP::G6).Case("g7", SP::G7) .Default(0); + // If we're directly referencing register names + // (e.g in GCC C extension `register int r asm("g1");`), + // make sure that said register is in the reserve list. + const SparcRegisterInfo *TRI = Subtarget->getRegisterInfo(); + if (!TRI->isReservedReg(MF, Reg)) + Reg = 0; + if (Reg) return Reg; @@ -1189,6 +1227,7 @@ SparcTargetLowering::LowerCall_64(TargetLowering::CallLoweringInfo &CLI, SDLoc DL = CLI.DL; SDValue Chain = CLI.Chain; auto PtrVT = getPointerTy(DAG.getDataLayout()); + MachineFunction &MF = DAG.getMachineFunction(); // Analyze operands of the call, assigning locations to each operand. SmallVector ArgLocs; @@ -1372,6 +1411,10 @@ SparcTargetLowering::LowerCall_64(TargetLowering::CallLoweringInfo &CLI, ((hasReturnsTwice) ? TRI->getRTCallPreservedMask(CLI.CallConv) : TRI->getCallPreservedMask(DAG.getMachineFunction(), CLI.CallConv)); + + if (isAnyArgRegReserved(TRI, MF)) + emitReservedArgRegCallError(MF); + assert(Mask && "Missing call preserved mask for calling convention"); Ops.push_back(DAG.getRegisterMask(Mask)); diff --git a/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp b/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp index f97bf57627d1aa..71a27f77d2c6bf 100644 --- a/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp +++ b/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp @@ -12,10 +12,8 @@ #include "SparcRegisterInfo.h" #include "Sparc.h" -#include "SparcMachineFunctionInfo.h" #include "SparcSubtarget.h" #include "llvm/ADT/BitVector.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -98,9 +96,21 @@ BitVector SparcRegisterInfo::getReservedRegs(const MachineFunction &MF) const { for (unsigned n = 0; n < 31; n++) Reserved.set(SP::ASR1 + n); + for (TargetRegisterClass::iterator i = SP::IntRegsRegClass.begin(); + i != SP::IntRegsRegClass.end(); ++i) { + if (MF.getSubtarget().isRegisterReserved(*i)) + markSuperRegs(Reserved, *i); + } + + assert(checkAllSuperRegsMarked(Reserved)); return Reserved; } +bool SparcRegisterInfo::isReservedReg(const MachineFunction &MF, + MCRegister Reg) const { + return getReservedRegs(MF)[Reg]; +} + const TargetRegisterClass* SparcRegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind) const { diff --git a/llvm/lib/Target/Sparc/SparcRegisterInfo.h b/llvm/lib/Target/Sparc/SparcRegisterInfo.h index 5b3c1a7ad07dd5..58c85f33635f2d 100644 --- a/llvm/lib/Target/Sparc/SparcRegisterInfo.h +++ b/llvm/lib/Target/Sparc/SparcRegisterInfo.h @@ -30,6 +30,7 @@ struct SparcRegisterInfo : public SparcGenRegisterInfo { const uint32_t* getRTCallPreservedMask(CallingConv::ID CC) const; BitVector getReservedRegs(const MachineFunction &MF) const override; + bool isReservedReg(const MachineFunction &MF, MCRegister Reg) const; const TargetRegisterClass *getPointerRegClass(const MachineFunction &MF, unsigned Kind) const override; diff --git a/llvm/lib/Target/Sparc/SparcRegisterInfo.td b/llvm/lib/Target/Sparc/SparcRegisterInfo.td index d5ba7464695c5f..d8319a8d41dda0 100644 --- a/llvm/lib/Target/Sparc/SparcRegisterInfo.td +++ b/llvm/lib/Target/Sparc/SparcRegisterInfo.td @@ -370,6 +370,10 @@ def LowQFPRegs : RegisterClass<"SP", [f128], 128, (sequence "Q%u", 0, 7)>; // Floating point control register classes. def FCCRegs : RegisterClass<"SP", [i1], 1, (sequence "FCC%u", 0, 3)>; +// GPR argument registers. +def GPROutgoingArg : RegisterClass<"SP", [i32, i64], 32, (sequence "O%u", 0, 5)>; +def GPRIncomingArg : RegisterClass<"SP", [i32, i64], 32, (sequence "I%u", 0, 5)>; + let isAllocatable = 0 in { // Ancillary state registers // FIXME: TICK is special-cased here as it can be accessed diff --git a/llvm/lib/Target/Sparc/SparcSubtarget.cpp b/llvm/lib/Target/Sparc/SparcSubtarget.cpp index 6b09904ca5e8e5..5b65e34e0f8a36 100644 --- a/llvm/lib/Target/Sparc/SparcSubtarget.cpp +++ b/llvm/lib/Target/Sparc/SparcSubtarget.cpp @@ -50,6 +50,7 @@ SparcSubtarget::SparcSubtarget(const StringRef &CPU, const StringRef &TuneCPU, const StringRef &FS, const TargetMachine &TM, bool is64Bit) : SparcGenSubtargetInfo(TM.getTargetTriple(), CPU, TuneCPU, FS), + ReserveRegister(TM.getMCRegisterInfo()->getNumRegs()), TargetTriple(TM.getTargetTriple()), Is64Bit(is64Bit), InstrInfo(initializeSubtargetDependencies(CPU, TuneCPU, FS)), TLInfo(TM, *this), FrameLowering(*this) {} diff --git a/llvm/lib/Target/Sparc/SparcSubtarget.h b/llvm/lib/Target/Sparc/SparcSubtarget.h index cdb210f67482c4..fe4aca5195306a 100644 --- a/llvm/lib/Target/Sparc/SparcSubtarget.h +++ b/llvm/lib/Target/Sparc/SparcSubtarget.h @@ -13,12 +13,14 @@ #ifndef LLVM_LIB_TARGET_SPARC_SPARCSUBTARGET_H #define LLVM_LIB_TARGET_SPARC_SPARCSUBTARGET_H +#include "MCTargetDesc/SparcMCTargetDesc.h" #include "SparcFrameLowering.h" #include "SparcISelLowering.h" #include "SparcInstrInfo.h" #include "llvm/CodeGen/SelectionDAGTargetInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/DataLayout.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/TargetParser/Triple.h" #include @@ -29,6 +31,10 @@ namespace llvm { class StringRef; class SparcSubtarget : public SparcGenSubtargetInfo { + // ReserveRegister[i] - Register #i is not available as a general purpose + // register. + BitVector ReserveRegister; + Triple TargetTriple; virtual void anchor(); @@ -82,6 +88,10 @@ class SparcSubtarget : public SparcGenSubtargetInfo { return is64Bit() ? 2047 : 0; } + bool isRegisterReserved(MCPhysReg PhysReg) const { + return ReserveRegister[PhysReg]; + } + /// Given a actual stack size as determined by FrameInfo, this function /// returns adjusted framesize which includes space for register window /// spills and arguments. diff --git a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp index 733f290b1bc93a..633fcb3314c42f 100644 --- a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp +++ b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp @@ -470,6 +470,9 @@ class LowerTypeTestsModule { Function *WeakInitializerFn = nullptr; + GlobalVariable *GlobalAnnotation; + DenseSet FunctionAnnotations; + bool shouldExportConstantsAsAbsoluteSymbols(); uint8_t *exportTypeId(StringRef TypeId, const TypeIdLowering &TIL); TypeIdLowering importTypeId(StringRef TypeId); @@ -531,6 +534,10 @@ class LowerTypeTestsModule { /// replace each use, which is a direct function call. void replaceDirectCalls(Value *Old, Value *New); + bool isFunctionAnnotation(Value *V) const { + return FunctionAnnotations.contains(V); + } + public: LowerTypeTestsModule(Module &M, ModuleAnalysisManager &AM, ModuleSummaryIndex *ExportSummary, @@ -1377,8 +1384,11 @@ void LowerTypeTestsModule::replaceWeakDeclarationWithJumpTablePtr( // (all?) targets. Switch to a runtime initializer. SmallSetVector GlobalVarUsers; findGlobalVariableUsersOf(F, GlobalVarUsers); - for (auto *GV : GlobalVarUsers) + for (auto *GV : GlobalVarUsers) { + if (GV == GlobalAnnotation) + continue; moveInitializerToModuleConstructor(GV); + } // Can not RAUW F with an expression that uses F. Replace with a temporary // placeholder first. @@ -1837,6 +1847,16 @@ LowerTypeTestsModule::LowerTypeTestsModule( } OS = TargetTriple.getOS(); ObjectFormat = TargetTriple.getObjectFormat(); + + // Function annotation describes or applies to function itself, and + // shouldn't be associated with jump table thunk generated for CFI. + GlobalAnnotation = M.getGlobalVariable("llvm.global.annotations"); + if (GlobalAnnotation && GlobalAnnotation->hasInitializer()) { + const ConstantArray *CA = + cast(GlobalAnnotation->getInitializer()); + for (Value *Op : CA->operands()) + FunctionAnnotations.insert(Op); + } } bool LowerTypeTestsModule::runForTesting(Module &M, ModuleAnalysisManager &AM) { @@ -1896,10 +1916,14 @@ void LowerTypeTestsModule::replaceCfiUses(Function *Old, Value *New, if (isa(U.getUser())) continue; - // Skip direct calls to externally defined or non-dso_local functions + // Skip direct calls to externally defined or non-dso_local functions. if (isDirectCall(U) && (Old->isDSOLocal() || !IsJumpTableCanonical)) continue; + // Skip function annotation. + if (isFunctionAnnotation(U.getUser())) + continue; + // Must handle Constants specially, we cannot call replaceUsesOfWith on a // constant because they are uniqued. if (auto *C = dyn_cast(U.getUser())) { diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 249f4a7710e046..5d207dcfd18dd4 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -2594,10 +2594,10 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) { Value *V; if ((has_single_bit(TyAllocSize) && match(GEP.getOperand(1), - m_Exact(m_AShr(m_Value(V), - m_SpecificInt(countr_zero(TyAllocSize)))))) || + m_Exact(m_Shr(m_Value(V), + m_SpecificInt(countr_zero(TyAllocSize)))))) || match(GEP.getOperand(1), - m_Exact(m_SDiv(m_Value(V), m_SpecificInt(TyAllocSize))))) { + m_Exact(m_IDiv(m_Value(V), m_SpecificInt(TyAllocSize))))) { GetElementPtrInst *NewGEP = GetElementPtrInst::Create( Builder.getInt8Ty(), GEP.getPointerOperand(), V); NewGEP->setIsInBounds(GEP.isInBounds()); diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp index a1d7f0f9ba0f74..a3951fdf8a1589 100644 --- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp +++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp @@ -1366,61 +1366,6 @@ Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty) { return V; } -static bool -canReuseInstruction(ScalarEvolution &SE, const SCEV *S, Instruction *I, - SmallVectorImpl &DropPoisonGeneratingInsts) { - // If the instruction cannot be poison, it's always safe to reuse. - if (programUndefinedIfPoison(I)) - return true; - - // Otherwise, it is possible that I is more poisonous that S. Collect the - // poison-contributors of S, and then check whether I has any additional - // poison-contributors. Poison that is contributed through poison-generating - // flags is handled by dropping those flags instead. - SmallPtrSet PoisonVals; - SE.getPoisonGeneratingValues(PoisonVals, S); - - SmallVector Worklist; - SmallPtrSet Visited; - Worklist.push_back(I); - while (!Worklist.empty()) { - Value *V = Worklist.pop_back_val(); - if (!Visited.insert(V).second) - continue; - - // Avoid walking large instruction graphs. - if (Visited.size() > 16) - return false; - - // Either the value can't be poison, or the S would also be poison if it - // is. - if (PoisonVals.contains(V) || isGuaranteedNotToBePoison(V)) - continue; - - auto *I = dyn_cast(V); - if (!I) - return false; - - // FIXME: Ignore vscale, even though it technically could be poison. Do this - // because SCEV currently assumes it can't be poison. Remove this special - // case once we proper model when vscale can be poison. - if (auto *II = dyn_cast(I); - II && II->getIntrinsicID() == Intrinsic::vscale) - continue; - - if (canCreatePoison(cast(I), /*ConsiderFlagsAndMetadata*/ false)) - return false; - - // If the instruction can't create poison, we can recurse to its operands. - if (I->hasPoisonGeneratingFlagsOrMetadata()) - DropPoisonGeneratingInsts.push_back(I); - - for (Value *Op : I->operands()) - Worklist.push_back(Op); - } - return true; -} - Value *SCEVExpander::FindValueInExprValueMap( const SCEV *S, const Instruction *InsertPt, SmallVectorImpl &DropPoisonGeneratingInsts) { @@ -1448,7 +1393,7 @@ Value *SCEVExpander::FindValueInExprValueMap( continue; // Make sure reusing the instruction is poison-safe. - if (canReuseInstruction(SE, S, EntInst, DropPoisonGeneratingInsts)) + if (SE.canReuseInstruction(S, EntInst, DropPoisonGeneratingInsts)) return V; DropPoisonGeneratingInsts.clear(); } diff --git a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp index 0ed3324a27b6c9..1b142f14d81139 100644 --- a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -16,6 +16,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" @@ -713,8 +714,11 @@ bool SimplifyIndvar::replaceFloatIVWithIntegerIV(Instruction *UseInst) { bool SimplifyIndvar::eliminateIdentitySCEV(Instruction *UseInst, Instruction *IVOperand) { if (!SE->isSCEVable(UseInst->getType()) || - (UseInst->getType() != IVOperand->getType()) || - (SE->getSCEV(UseInst) != SE->getSCEV(IVOperand))) + UseInst->getType() != IVOperand->getType()) + return false; + + const SCEV *UseSCEV = SE->getSCEV(UseInst); + if (UseSCEV != SE->getSCEV(IVOperand)) return false; // getSCEV(X) == getSCEV(Y) does not guarantee that X and Y are related in the @@ -742,6 +746,16 @@ bool SimplifyIndvar::eliminateIdentitySCEV(Instruction *UseInst, if (!LI->replacementPreservesLCSSAForm(UseInst, IVOperand)) return false; + // Make sure the operand is not more poisonous than the instruction. + if (!impliesPoison(IVOperand, UseInst)) { + SmallVector DropPoisonGeneratingInsts; + if (!SE->canReuseInstruction(UseSCEV, IVOperand, DropPoisonGeneratingInsts)) + return false; + + for (Instruction *I : DropPoisonGeneratingInsts) + I->dropPoisonGeneratingFlagsAndMetadata(); + } + LLVM_DEBUG(dbgs() << "INDVARS: Eliminated identity: " << *UseInst << '\n'); SE->forgetValue(UseInst); diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 601d2454c1e163..0a9e2c7f49f55f 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -10215,8 +10215,18 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis { UniqueBases.insert(VecBase); // If the only one use is vectorized - can delete the extractelement // itself. - if (!EI->hasOneUse() || any_of(EI->users(), [&](User *U) { - return !R.ScalarToTreeEntry.count(U); + if (!EI->hasOneUse() || (NumParts != 1 && count(E->Scalars, EI) > 1) || + any_of(EI->users(), [&](User *U) { + const TreeEntry *UTE = R.getTreeEntry(U); + return !UTE || R.MultiNodeScalars.contains(U) || + count_if(R.VectorizableTree, + [&](const std::unique_ptr &TE) { + return any_of(TE->UserTreeIndices, + [&](const EdgeInfo &Edge) { + return Edge.UserTE == UTE; + }) && + is_contained(TE->Scalars, EI); + }) != 1; })) continue; R.eraseInstruction(EI); diff --git a/llvm/test/CodeGen/AArch64/aarch64-za-clobber.ll b/llvm/test/CodeGen/AArch64/aarch64-za-clobber.ll new file mode 100644 index 00000000000000..a8cba7dc9a91e9 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/aarch64-za-clobber.ll @@ -0,0 +1,16 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-none-linux-gnu -stop-after=aarch64-isel < %s -o - | FileCheck %s + +define void @alpha( %x) local_unnamed_addr { +entry: +; CHECK: INLINEASM &"movt zt0[3, mul vl], z0", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def early-clobber $za + tail call void asm sideeffect "movt zt0[3, mul vl], z0", "~{za}"() + ret void +} + +define void @beta( %x) local_unnamed_addr { +entry: +; CHECK: INLINEASM &"movt zt0[3, mul vl], z0", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def early-clobber $zt0 + tail call void asm sideeffect "movt zt0[3, mul vl], z0", "~{zt0}"() + ret void +} diff --git a/llvm/test/CodeGen/AArch64/win64-fpowi.ll b/llvm/test/CodeGen/AArch64/win64-fpowi.ll new file mode 100644 index 00000000000000..3eb74f8394ec4e --- /dev/null +++ b/llvm/test/CodeGen/AArch64/win64-fpowi.ll @@ -0,0 +1,136 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc -mtriple=aarch64-pc-windows-msvc19 -verify-machineinstrs %s -o - | FileCheck %s +; RUN: llc -mtriple=aarch64-pc-windows-msvc19 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - | FileCheck %s + +define double @powi_f64(double %a, i32 %b) { +; CHECK-LABEL: powi_f64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: scvtf d1, w0 +; CHECK-NEXT: b pow +entry: + %c = call double @llvm.powi.f64.i32(double %a, i32 %b) + ret double %c +} + +define float @powi_f32(float %a, i32 %b) { +; CHECK-LABEL: powi_f32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: scvtf s1, w0 +; CHECK-NEXT: b powf +entry: + %c = call float @llvm.powi.f32.i32(float %a, i32 %b) + ret float %c +} + +define half @powi_f16(half %a, i32 %b) { +; CHECK-LABEL: powi_f16: +; CHECK: .seh_proc powi_f16 +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .seh_save_reg_x x30, 16 +; CHECK-NEXT: .seh_endprologue +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: scvtf s1, w0 +; CHECK-NEXT: bl powf +; CHECK-NEXT: fcvt h0, s0 +; CHECK-NEXT: .seh_startepilogue +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .seh_save_reg_x x30, 16 +; CHECK-NEXT: .seh_endepilogue +; CHECK-NEXT: ret +; CHECK-NEXT: .seh_endfunclet +; CHECK-NEXT: .seh_endproc +entry: + %c = call half @llvm.powi.f16.i32(half %a, i32 %b) + ret half %c +} + +define <2 x double> @powi_v2f64(<2 x double> %a, i32 %b) { +; CHECK-LABEL: powi_v2f64: +; CHECK: .seh_proc powi_v2f64 +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: sub sp, sp, #48 +; CHECK-NEXT: .seh_stackalloc 48 +; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-NEXT: .seh_save_reg x30, 32 +; CHECK-NEXT: str d8, [sp, #40] // 8-byte Folded Spill +; CHECK-NEXT: .seh_save_freg d8, 40 +; CHECK-NEXT: .seh_endprologue +; CHECK-NEXT: scvtf d8, w0 +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: mov d0, v0.d[1] +; CHECK-NEXT: fmov d1, d8 +; CHECK-NEXT: bl pow +; CHECK-NEXT: fmov d1, d8 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: bl pow +; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: .seh_startepilogue +; CHECK-NEXT: ldr d8, [sp, #40] // 8-byte Folded Reload +; CHECK-NEXT: .seh_save_freg d8, 40 +; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: .seh_save_reg x30, 32 +; CHECK-NEXT: add sp, sp, #48 +; CHECK-NEXT: .seh_stackalloc 48 +; CHECK-NEXT: .seh_endepilogue +; CHECK-NEXT: ret +; CHECK-NEXT: .seh_endfunclet +; CHECK-NEXT: .seh_endproc +entry: + %c = call <2 x double> @llvm.powi.v2f64.i32(<2 x double> %a, i32 %b) + ret <2 x double> %c +} + +define <2 x float> @powi_v2f32(<2 x float> %a, i32 %b) { +; CHECK-LABEL: powi_v2f32: +; CHECK: .seh_proc powi_v2f32 +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: sub sp, sp, #48 +; CHECK-NEXT: .seh_stackalloc 48 +; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-NEXT: .seh_save_reg x30, 32 +; CHECK-NEXT: str d8, [sp, #40] // 8-byte Folded Spill +; CHECK-NEXT: .seh_save_freg d8, 40 +; CHECK-NEXT: .seh_endprologue +; CHECK-NEXT: scvtf s8, w0 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: mov s0, v0.s[1] +; CHECK-NEXT: fmov s1, s8 +; CHECK-NEXT: bl powf +; CHECK-NEXT: fmov s1, s8 +; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 +; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 +; CHECK-NEXT: bl powf +; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 +; CHECK-NEXT: mov v0.s[1], v1.s[0] +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: .seh_startepilogue +; CHECK-NEXT: ldr d8, [sp, #40] // 8-byte Folded Reload +; CHECK-NEXT: .seh_save_freg d8, 40 +; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: .seh_save_reg x30, 32 +; CHECK-NEXT: add sp, sp, #48 +; CHECK-NEXT: .seh_stackalloc 48 +; CHECK-NEXT: .seh_endepilogue +; CHECK-NEXT: ret +; CHECK-NEXT: .seh_endfunclet +; CHECK-NEXT: .seh_endproc +entry: + %c = call <2 x float> @llvm.powi.v2f32.i32(<2 x float> %a, i32 %b) + ret <2 x float> %c +} + +declare <2 x double> @llvm.powi.v2f64.i32(<2 x double>, i32) +declare <2 x float> @llvm.powi.v2f32.i32(<2 x float>, i32) +declare double @llvm.powi.f64.i32(double, i32) +declare float @llvm.powi.f32.i32(float, i32) +declare half @llvm.powi.f16.i32(half, i32) diff --git a/llvm/test/CodeGen/Mips/hf1_body.ll b/llvm/test/CodeGen/Mips/hf1_body.ll index 184ea31bddc9d2..c3dea67896210a 100644 --- a/llvm/test/CodeGen/Mips/hf1_body.ll +++ b/llvm/test/CodeGen/Mips/hf1_body.ll @@ -23,8 +23,8 @@ entry: ; ALL: .set reorder ; ALL: .reloc 0, R_MIPS_NONE, v_sf ; GAS: la $25, $__fn_local_v_sf -; IAS: lw $25, %got($$__fn_local_v_sf)($gp) -; IAS: addiu $25, $25, %lo($$__fn_local_v_sf) +; IAS: lw $25, %got($__fn_local_v_sf)($gp) +; IAS: addiu $25, $25, %lo($__fn_local_v_sf) ; ALL: mfc1 $4, $f12 ; ALL: jr $25 ; ALL: .end __fn_stub_v_sf diff --git a/llvm/test/CodeGen/PowerPC/pr59074.ll b/llvm/test/CodeGen/PowerPC/pr59074.ll new file mode 100644 index 00000000000000..3e328c6ad9f0ba --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/pr59074.ll @@ -0,0 +1,132 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr7 < %s | FileCheck %s --check-prefix=LE64 +; RUN: llc -mtriple=powerpcle-unknown-linux-gnu -mcpu=pwr7 < %s | FileCheck %s --check-prefix=LE32 +; RUN: llc -mtriple=powerpc64-ibm-aix -mcpu=pwr7 < %s | FileCheck %s --check-prefix=BE64 +; RUN: llc -mtriple=powerpc-ibm-aix -mcpu=pwr7 < %s | FileCheck %s --check-prefix=BE32 + +; To verify this doesn't crash due to array out of bound. +define void @pr59074(ptr %0) { +; LE64-LABEL: pr59074: +; LE64: # %bb.0: # %entry +; LE64-NEXT: lwz 6, 0(3) +; LE64-NEXT: li 7, 12 +; LE64-NEXT: ld 4, 16(3) +; LE64-NEXT: ld 5, 24(3) +; LE64-NEXT: addi 6, 6, -12 +; LE64-NEXT: std 4, 16(3) +; LE64-NEXT: std 5, 24(3) +; LE64-NEXT: srd 6, 7, 6 +; LE64-NEXT: li 7, 0 +; LE64-NEXT: std 7, 8(3) +; LE64-NEXT: std 6, 0(3) +; LE64-NEXT: blr +; +; LE32-LABEL: pr59074: +; LE32: # %bb.0: # %entry +; LE32-NEXT: stwu 1, -80(1) +; LE32-NEXT: .cfi_def_cfa_offset 80 +; LE32-NEXT: lwz 4, 0(3) +; LE32-NEXT: xxlxor 0, 0, 0 +; LE32-NEXT: li 5, 4 +; LE32-NEXT: addi 6, 1, 16 +; LE32-NEXT: li 7, 0 +; LE32-NEXT: li 8, 12 +; LE32-NEXT: xxswapd 0, 0 +; LE32-NEXT: addi 4, 4, -12 +; LE32-NEXT: rlwinm 9, 4, 29, 28, 31 +; LE32-NEXT: stxvd2x 0, 6, 5 +; LE32-NEXT: stw 7, 44(1) +; LE32-NEXT: stw 7, 40(1) +; LE32-NEXT: stw 7, 36(1) +; LE32-NEXT: stw 8, 16(1) +; LE32-NEXT: lwzux 5, 9, 6 +; LE32-NEXT: li 6, 7 +; LE32-NEXT: lwz 7, 8(9) +; LE32-NEXT: nand 6, 4, 6 +; LE32-NEXT: lwz 8, 4(9) +; LE32-NEXT: clrlwi 4, 4, 29 +; LE32-NEXT: lwz 9, 12(9) +; LE32-NEXT: clrlwi 6, 6, 27 +; LE32-NEXT: subfic 11, 4, 32 +; LE32-NEXT: srw 5, 5, 4 +; LE32-NEXT: slwi 10, 7, 1 +; LE32-NEXT: srw 7, 7, 4 +; LE32-NEXT: slw 6, 10, 6 +; LE32-NEXT: srw 10, 8, 4 +; LE32-NEXT: slw 8, 8, 11 +; LE32-NEXT: slw 11, 9, 11 +; LE32-NEXT: srw 4, 9, 4 +; LE32-NEXT: or 5, 8, 5 +; LE32-NEXT: or 7, 11, 7 +; LE32-NEXT: or 6, 10, 6 +; LE32-NEXT: stw 4, 12(3) +; LE32-NEXT: stw 7, 8(3) +; LE32-NEXT: stw 5, 0(3) +; LE32-NEXT: stw 6, 4(3) +; LE32-NEXT: addi 1, 1, 80 +; LE32-NEXT: blr +; +; BE64-LABEL: pr59074: +; BE64: # %bb.0: # %entry +; BE64-NEXT: lwz 6, 12(3) +; BE64-NEXT: li 7, 12 +; BE64-NEXT: ld 4, 24(3) +; BE64-NEXT: ld 5, 16(3) +; BE64-NEXT: addi 6, 6, -12 +; BE64-NEXT: std 4, 24(3) +; BE64-NEXT: std 5, 16(3) +; BE64-NEXT: srd 6, 7, 6 +; BE64-NEXT: li 7, 0 +; BE64-NEXT: std 7, 0(3) +; BE64-NEXT: std 6, 8(3) +; BE64-NEXT: blr +; +; BE32-LABEL: pr59074: +; BE32: # %bb.0: # %entry +; BE32-NEXT: lwz 4, 12(3) +; BE32-NEXT: xxlxor 0, 0, 0 +; BE32-NEXT: addi 5, 1, -64 +; BE32-NEXT: li 6, 12 +; BE32-NEXT: li 7, 0 +; BE32-NEXT: addi 8, 1, -48 +; BE32-NEXT: li 10, 7 +; BE32-NEXT: stxvw4x 0, 0, 5 +; BE32-NEXT: addi 4, 4, -12 +; BE32-NEXT: stw 6, -36(1) +; BE32-NEXT: stw 7, -40(1) +; BE32-NEXT: stw 7, -44(1) +; BE32-NEXT: rlwinm 9, 4, 29, 28, 31 +; BE32-NEXT: stw 7, -48(1) +; BE32-NEXT: sub 5, 8, 9 +; BE32-NEXT: nand 6, 4, 10 +; BE32-NEXT: clrlwi 4, 4, 29 +; BE32-NEXT: clrlwi 6, 6, 27 +; BE32-NEXT: lwz 7, 4(5) +; BE32-NEXT: lwz 8, 8(5) +; BE32-NEXT: lwz 9, 0(5) +; BE32-NEXT: lwz 5, 12(5) +; BE32-NEXT: slwi 10, 7, 1 +; BE32-NEXT: srw 11, 8, 4 +; BE32-NEXT: srw 7, 7, 4 +; BE32-NEXT: srw 5, 5, 4 +; BE32-NEXT: slw 6, 10, 6 +; BE32-NEXT: subfic 10, 4, 32 +; BE32-NEXT: srw 4, 9, 4 +; BE32-NEXT: slw 8, 8, 10 +; BE32-NEXT: slw 10, 9, 10 +; BE32-NEXT: or 6, 11, 6 +; BE32-NEXT: or 7, 10, 7 +; BE32-NEXT: or 5, 8, 5 +; BE32-NEXT: stw 4, 0(3) +; BE32-NEXT: stw 6, 8(3) +; BE32-NEXT: stw 5, 12(3) +; BE32-NEXT: stw 7, 4(3) +; BE32-NEXT: blr +entry: + %v1 = load <2 x i128>, <2 x i128>* %0 + %v2 = insertelement <2 x i128> %v1, i128 12, i32 0 + %v3 = sub <2 x i128> %v1, %v2 + %v4 = lshr <2 x i128> %v2, %v3 + store <2 x i128> %v4, <2 x i128>* %0 + ret void +} diff --git a/llvm/test/CodeGen/RISCV/branch-opt.mir b/llvm/test/CodeGen/RISCV/branch-opt.mir new file mode 100644 index 00000000000000..ba3a20f2fbfcd3 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/branch-opt.mir @@ -0,0 +1,68 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4 +# RUN: llc %s -mtriple=riscv64 -run-pass=peephole-opt -o - | FileCheck %s + +# Make sure we shouldn't replace the %2 ADDI with the $x10 ADDI since it has a +# physical register destination. + +--- | + define void @foo(i32 signext %0) { + tail call void @bar(i32 1) + %2 = icmp ugt i32 %0, 1 + br i1 %2, label %3, label %4 + + 3: ; preds = %1 + tail call void @bar(i32 3) + ret void + + 4: ; preds = %1 + ret void + } + + declare void @bar(...) + +... +--- +name: foo +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: foo + ; CHECK: bb.0 (%ir-block.1): + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $x2, implicit $x2 + ; CHECK-NEXT: $x10 = ADDI $x0, 1 + ; CHECK-NEXT: PseudoCALL target-flags(riscv-call) @bar, csr_ilp32_lp64, implicit-def dead $x1, implicit $x10, implicit-def $x2 + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $x2, implicit $x2 + ; CHECK-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 2 + ; CHECK-NEXT: BLTU [[COPY]], killed [[ADDI]], %bb.2 + ; CHECK-NEXT: PseudoBR %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1 (%ir-block.3): + ; CHECK-NEXT: $x10 = ADDI $x0, 3 + ; CHECK-NEXT: PseudoTAIL target-flags(riscv-call) @bar, implicit $x2, implicit $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2 (%ir-block.4): + ; CHECK-NEXT: PseudoRET + bb.0 (%ir-block.1): + successors: %bb.1, %bb.2 + liveins: $x10 + + %0:gpr = COPY $x10 + ADJCALLSTACKDOWN 0, 0, implicit-def dead $x2, implicit $x2 + $x10 = ADDI $x0, 1 + PseudoCALL target-flags(riscv-call) @bar, csr_ilp32_lp64, implicit-def dead $x1, implicit $x10, implicit-def $x2 + ADJCALLSTACKUP 0, 0, implicit-def dead $x2, implicit $x2 + %2:gpr = ADDI $x0, 2 + BLTU %0, killed %2, %bb.2 + PseudoBR %bb.1 + + bb.1 (%ir-block.3): + $x10 = ADDI $x0, 3 + PseudoTAIL target-flags(riscv-call) @bar, implicit $x2, implicit $x10 + + bb.2 (%ir-block.4): + PseudoRET + +... diff --git a/llvm/test/CodeGen/RISCV/condops.ll b/llvm/test/CodeGen/RISCV/condops.ll index 8e53782b5dcd78..101cb5aeeb0940 100644 --- a/llvm/test/CodeGen/RISCV/condops.ll +++ b/llvm/test/CodeGen/RISCV/condops.ll @@ -3719,3 +3719,54 @@ entry: %cond = select i1 %tobool.not, i64 0, i64 %x ret i64 %cond } + +; Test that we don't crash on types larger than 64 bits. +define i64 @single_bit3(i80 %x, i64 %y) { +; RV32I-LABEL: single_bit3: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: lw a0, 8(a0) +; RV32I-NEXT: slli a0, a0, 31 +; RV32I-NEXT: srai a3, a0, 31 +; RV32I-NEXT: and a0, a3, a1 +; RV32I-NEXT: and a1, a3, a2 +; RV32I-NEXT: ret +; +; RV64I-LABEL: single_bit3: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: slli a1, a1, 63 +; RV64I-NEXT: srai a0, a1, 63 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: ret +; +; RV64XVENTANACONDOPS-LABEL: single_bit3: +; RV64XVENTANACONDOPS: # %bb.0: # %entry +; RV64XVENTANACONDOPS-NEXT: andi a1, a1, 1 +; RV64XVENTANACONDOPS-NEXT: vt.maskc a0, a2, a1 +; RV64XVENTANACONDOPS-NEXT: ret +; +; RV64XTHEADCONDMOV-LABEL: single_bit3: +; RV64XTHEADCONDMOV: # %bb.0: # %entry +; RV64XTHEADCONDMOV-NEXT: slli a1, a1, 63 +; RV64XTHEADCONDMOV-NEXT: srai a0, a1, 63 +; RV64XTHEADCONDMOV-NEXT: and a0, a0, a2 +; RV64XTHEADCONDMOV-NEXT: ret +; +; RV32ZICOND-LABEL: single_bit3: +; RV32ZICOND: # %bb.0: # %entry +; RV32ZICOND-NEXT: lw a0, 8(a0) +; RV32ZICOND-NEXT: andi a3, a0, 1 +; RV32ZICOND-NEXT: czero.eqz a0, a1, a3 +; RV32ZICOND-NEXT: czero.eqz a1, a2, a3 +; RV32ZICOND-NEXT: ret +; +; RV64ZICOND-LABEL: single_bit3: +; RV64ZICOND: # %bb.0: # %entry +; RV64ZICOND-NEXT: andi a1, a1, 1 +; RV64ZICOND-NEXT: czero.eqz a0, a2, a1 +; RV64ZICOND-NEXT: ret +entry: + %and = and i80 %x, 18446744073709551616 ; 1 << 64 + %tobool.not = icmp eq i80 %and, 0 + %cond = select i1 %tobool.not, i64 0, i64 %y + ret i64 %cond +} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll index dab530751ef96b..799aebcaa63026 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll @@ -238,26 +238,39 @@ define <64 x half> @interleave_v32f16(<32 x half> %x, <32 x half> %y) { define <64 x float> @interleave_v32f32(<32 x float> %x, <32 x float> %y) { ; V128-LABEL: interleave_v32f32: ; V128: # %bb.0: -; V128-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; V128-NEXT: vslidedown.vi v0, v8, 16 -; V128-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; V128-NEXT: vwaddu.vv v24, v0, v8 -; V128-NEXT: li a0, -1 -; V128-NEXT: vwmaccu.vx v24, a0, v8 -; V128-NEXT: lui a1, %hi(.LCPI10_0) -; V128-NEXT: addi a1, a1, %lo(.LCPI10_0) -; V128-NEXT: li a2, 32 -; V128-NEXT: vsetvli zero, a2, e32, m8, ta, mu -; V128-NEXT: vle16.v v12, (a1) -; V128-NEXT: lui a1, 699051 -; V128-NEXT: addi a1, a1, -1366 -; V128-NEXT: vmv.s.x v0, a1 +; V128-NEXT: addi sp, sp, -16 +; V128-NEXT: .cfi_def_cfa_offset 16 +; V128-NEXT: csrr a0, vlenb +; V128-NEXT: slli a0, a0, 2 +; V128-NEXT: sub sp, sp, a0 +; V128-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; V128-NEXT: lui a0, %hi(.LCPI10_0) +; V128-NEXT: addi a0, a0, %lo(.LCPI10_0) +; V128-NEXT: li a1, 32 +; V128-NEXT: vsetvli zero, a1, e32, m8, ta, mu +; V128-NEXT: vle16.v v4, (a0) +; V128-NEXT: lui a0, %hi(.LCPI10_1) +; V128-NEXT: addi a0, a0, %lo(.LCPI10_1) +; V128-NEXT: vle16.v v24, (a0) +; V128-NEXT: addi a0, sp, 16 +; V128-NEXT: vs4r.v v24, (a0) # Unknown-size Folded Spill +; V128-NEXT: lui a0, 699051 +; V128-NEXT: addi a0, a0, -1366 +; V128-NEXT: vmv.s.x v0, a0 +; V128-NEXT: vrgatherei16.vv v24, v8, v4 +; V128-NEXT: addi a0, sp, 16 +; V128-NEXT: vl4r.v v12, (a0) # Unknown-size Folded Reload ; V128-NEXT: vrgatherei16.vv v24, v16, v12, v0.t ; V128-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; V128-NEXT: vwaddu.vv v0, v8, v16 +; V128-NEXT: li a0, -1 ; V128-NEXT: vwmaccu.vx v0, a0, v16 ; V128-NEXT: vmv8r.v v8, v0 ; V128-NEXT: vmv8r.v v16, v24 +; V128-NEXT: csrr a0, vlenb +; V128-NEXT: slli a0, a0, 2 +; V128-NEXT: add sp, sp, a0 +; V128-NEXT: addi sp, sp, 16 ; V128-NEXT: ret ; ; V512-LABEL: interleave_v32f32: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll index 9e21cc9e3d624a..e1bd16649eede7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll @@ -188,30 +188,24 @@ define <4 x i32> @interleave_v4i32_offset_2(<4 x i32> %x, <4 x i32> %y) { define <4 x i32> @interleave_v4i32_offset_1(<4 x i32> %x, <4 x i32> %y) { ; V128-LABEL: interleave_v4i32_offset_1: ; V128: # %bb.0: -; V128-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; V128-NEXT: vwaddu.vv v10, v8, v8 -; V128-NEXT: li a0, -1 -; V128-NEXT: vwmaccu.vx v10, a0, v8 ; V128-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; V128-NEXT: vid.v v8 -; V128-NEXT: vsrl.vi v8, v8, 1 +; V128-NEXT: vid.v v10 +; V128-NEXT: vsrl.vi v11, v10, 1 +; V128-NEXT: vrgather.vv v10, v8, v11 ; V128-NEXT: vmv.v.i v0, 10 -; V128-NEXT: vadd.vi v8, v8, 1 +; V128-NEXT: vadd.vi v8, v11, 1 ; V128-NEXT: vrgather.vv v10, v9, v8, v0.t ; V128-NEXT: vmv.v.v v8, v10 ; V128-NEXT: ret ; ; V512-LABEL: interleave_v4i32_offset_1: ; V512: # %bb.0: -; V512-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; V512-NEXT: vwaddu.vv v10, v8, v8 -; V512-NEXT: li a0, -1 -; V512-NEXT: vwmaccu.vx v10, a0, v8 ; V512-NEXT: vsetivli zero, 4, e32, mf2, ta, mu -; V512-NEXT: vid.v v8 -; V512-NEXT: vsrl.vi v8, v8, 1 +; V512-NEXT: vid.v v10 +; V512-NEXT: vsrl.vi v11, v10, 1 +; V512-NEXT: vrgather.vv v10, v8, v11 ; V512-NEXT: vmv.v.i v0, 10 -; V512-NEXT: vadd.vi v8, v8, 1 +; V512-NEXT: vadd.vi v8, v11, 1 ; V512-NEXT: vrgather.vv v10, v9, v8, v0.t ; V512-NEXT: vmv1r.v v8, v10 ; V512-NEXT: ret @@ -403,26 +397,39 @@ define <64 x i16> @interleave_v32i16(<32 x i16> %x, <32 x i16> %y) { define <64 x i32> @interleave_v32i32(<32 x i32> %x, <32 x i32> %y) { ; V128-LABEL: interleave_v32i32: ; V128: # %bb.0: -; V128-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; V128-NEXT: vslidedown.vi v0, v8, 16 -; V128-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; V128-NEXT: vwaddu.vv v24, v0, v8 -; V128-NEXT: li a0, -1 -; V128-NEXT: vwmaccu.vx v24, a0, v8 -; V128-NEXT: lui a1, %hi(.LCPI17_0) -; V128-NEXT: addi a1, a1, %lo(.LCPI17_0) -; V128-NEXT: li a2, 32 -; V128-NEXT: vsetvli zero, a2, e32, m8, ta, mu -; V128-NEXT: vle16.v v12, (a1) -; V128-NEXT: lui a1, 699051 -; V128-NEXT: addi a1, a1, -1366 -; V128-NEXT: vmv.s.x v0, a1 +; V128-NEXT: addi sp, sp, -16 +; V128-NEXT: .cfi_def_cfa_offset 16 +; V128-NEXT: csrr a0, vlenb +; V128-NEXT: slli a0, a0, 2 +; V128-NEXT: sub sp, sp, a0 +; V128-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; V128-NEXT: lui a0, %hi(.LCPI17_0) +; V128-NEXT: addi a0, a0, %lo(.LCPI17_0) +; V128-NEXT: li a1, 32 +; V128-NEXT: vsetvli zero, a1, e32, m8, ta, mu +; V128-NEXT: vle16.v v4, (a0) +; V128-NEXT: lui a0, %hi(.LCPI17_1) +; V128-NEXT: addi a0, a0, %lo(.LCPI17_1) +; V128-NEXT: vle16.v v24, (a0) +; V128-NEXT: addi a0, sp, 16 +; V128-NEXT: vs4r.v v24, (a0) # Unknown-size Folded Spill +; V128-NEXT: lui a0, 699051 +; V128-NEXT: addi a0, a0, -1366 +; V128-NEXT: vmv.s.x v0, a0 +; V128-NEXT: vrgatherei16.vv v24, v8, v4 +; V128-NEXT: addi a0, sp, 16 +; V128-NEXT: vl4r.v v12, (a0) # Unknown-size Folded Reload ; V128-NEXT: vrgatherei16.vv v24, v16, v12, v0.t ; V128-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; V128-NEXT: vwaddu.vv v0, v8, v16 +; V128-NEXT: li a0, -1 ; V128-NEXT: vwmaccu.vx v0, a0, v16 ; V128-NEXT: vmv8r.v v8, v0 ; V128-NEXT: vmv8r.v v16, v24 +; V128-NEXT: csrr a0, vlenb +; V128-NEXT: slli a0, a0, 2 +; V128-NEXT: add sp, sp, a0 +; V128-NEXT: addi sp, sp, 16 ; V128-NEXT: ret ; ; V512-LABEL: interleave_v32i32: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll index a26a87a1f3c139..a56a81f5f793bc 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll @@ -612,11 +612,13 @@ define <8 x i8> @concat_4xi8_start_undef_at_start(<8 x i8> %v, <8 x i8> %w) { ; CHECK-LABEL: concat_4xi8_start_undef_at_start: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vid.v v11 +; CHECK-NEXT: vrgather.vv v10, v8, v11 ; CHECK-NEXT: li a0, 224 ; CHECK-NEXT: vmv.s.x v0, a0 -; CHECK-NEXT: vadd.vi v10, v10, -4 -; CHECK-NEXT: vrgather.vv v8, v9, v10, v0.t +; CHECK-NEXT: vadd.vi v8, v11, -4 +; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> ret <8 x i8> %res @@ -626,11 +628,13 @@ define <8 x i8> @merge_start_into_end_non_contiguous(<8 x i8> %v, <8 x i8> %w) { ; CHECK-LABEL: merge_start_into_end_non_contiguous: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vid.v v11 +; CHECK-NEXT: vrgather.vv v10, v8, v11 ; CHECK-NEXT: li a0, 144 ; CHECK-NEXT: vmv.s.x v0, a0 -; CHECK-NEXT: vadd.vi v10, v10, -4 -; CHECK-NEXT: vrgather.vv v8, v9, v10, v0.t +; CHECK-NEXT: vadd.vi v8, v11, -4 +; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> ret <8 x i8> %res @@ -671,11 +675,13 @@ define <8 x i8> @merge_slidedown(<8 x i8> %v, <8 x i8> %w) { ; CHECK-LABEL: merge_slidedown: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; CHECK-NEXT: vslidedown.vi v8, v8, 1 +; CHECK-NEXT: vid.v v11 +; CHECK-NEXT: vadd.vi v12, v11, 1 ; CHECK-NEXT: li a0, 195 ; CHECK-NEXT: vmv.s.x v0, a0 -; CHECK-NEXT: vid.v v10 -; CHECK-NEXT: vrgather.vv v8, v9, v10, v0.t +; CHECK-NEXT: vrgather.vv v10, v8, v12 +; CHECK-NEXT: vrgather.vv v10, v9, v11, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> ret <8 x i8> %res @@ -686,12 +692,14 @@ define <8 x i8> @merge_non_contiguous_slideup_slidedown(<8 x i8> %v, <8 x i8> %w ; CHECK-LABEL: merge_non_contiguous_slideup_slidedown: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; CHECK-NEXT: vid.v v10 -; CHECK-NEXT: vadd.vi v10, v10, -1 +; CHECK-NEXT: vid.v v11 +; CHECK-NEXT: vadd.vi v12, v11, 2 +; CHECK-NEXT: vrgather.vv v10, v8, v12 ; CHECK-NEXT: li a0, 234 ; CHECK-NEXT: vmv.s.x v0, a0 -; CHECK-NEXT: vslidedown.vi v8, v8, 2 -; CHECK-NEXT: vrgather.vv v8, v9, v10, v0.t +; CHECK-NEXT: vadd.vi v8, v11, -1 +; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> ret <8 x i8> %res @@ -702,13 +710,16 @@ define <8 x i8> @unmergable(<8 x i8> %v, <8 x i8> %w) { ; CHECK-LABEL: unmergable: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vadd.vi v11, v10, 2 ; CHECK-NEXT: lui a0, %hi(.LCPI46_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI46_0) -; CHECK-NEXT: vle8.v v10, (a0) +; CHECK-NEXT: vle8.v v12, (a0) ; CHECK-NEXT: li a0, 234 ; CHECK-NEXT: vmv.s.x v0, a0 -; CHECK-NEXT: vslidedown.vi v8, v8, 2 -; CHECK-NEXT: vrgather.vv v8, v9, v10, v0.t +; CHECK-NEXT: vrgather.vv v10, v8, v11 +; CHECK-NEXT: vrgather.vv v10, v9, v12, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> ret <8 x i8> %res diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll index f889041647b235..eeb8e517d01d2d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll @@ -8,51 +8,23 @@ ; FIXME: This should be widened to a vlseg2 of <4 x i32> with VL set to 3 define {<3 x i32>, <3 x i32>} @load_factor2_v3(ptr %ptr) { -; RV32-LABEL: load_factor2_v3: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma -; RV32-NEXT: vle32.v v10, (a0) -; RV32-NEXT: vsetivli zero, 2, e32, m1, ta, ma -; RV32-NEXT: vslidedown.vi v9, v10, 2 -; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; RV32-NEXT: vwaddu.vv v8, v10, v9 -; RV32-NEXT: li a0, -1 -; RV32-NEXT: vwmaccu.vx v8, a0, v9 -; RV32-NEXT: vmv.v.i v0, 4 -; RV32-NEXT: vsetivli zero, 4, e32, m2, ta, ma -; RV32-NEXT: vslidedown.vi v12, v10, 4 -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; RV32-NEXT: vrgather.vi v8, v12, 0, v0.t -; RV32-NEXT: vid.v v9 -; RV32-NEXT: vadd.vv v9, v9, v9 -; RV32-NEXT: vadd.vi v11, v9, 1 -; RV32-NEXT: vrgather.vv v9, v10, v11 -; RV32-NEXT: vrgather.vi v9, v12, 1, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: load_factor2_v3: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma -; RV64-NEXT: vle32.v v10, (a0) -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64-NEXT: vid.v v8 -; RV64-NEXT: vadd.vv v8, v8, v8 -; RV64-NEXT: vadd.vi v8, v8, 1 -; RV64-NEXT: vrgather.vv v9, v10, v8 -; RV64-NEXT: vmv.v.i v0, 4 -; RV64-NEXT: vsetivli zero, 4, e32, m2, ta, ma -; RV64-NEXT: vslidedown.vi v12, v10, 4 -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; RV64-NEXT: vrgather.vi v9, v12, 1, v0.t -; RV64-NEXT: vsetivli zero, 2, e32, m1, ta, ma -; RV64-NEXT: vslidedown.vi v11, v10, 2 -; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; RV64-NEXT: vwaddu.vv v8, v10, v11 -; RV64-NEXT: li a0, -1 -; RV64-NEXT: vwmaccu.vx v8, a0, v11 -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; RV64-NEXT: vrgather.vi v8, v12, 0, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: load_factor2_v3: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma +; CHECK-NEXT: vle32.v v10, (a0) +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vid.v v8 +; CHECK-NEXT: vadd.vv v9, v8, v8 +; CHECK-NEXT: vrgather.vv v8, v10, v9 +; CHECK-NEXT: vmv.v.i v0, 4 +; CHECK-NEXT: vsetivli zero, 4, e32, m2, ta, ma +; CHECK-NEXT: vslidedown.vi v12, v10, 4 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vrgather.vi v8, v12, 0, v0.t +; CHECK-NEXT: vadd.vi v11, v9, 1 +; CHECK-NEXT: vrgather.vv v9, v10, v11 +; CHECK-NEXT: vrgather.vi v9, v12, 1, v0.t +; CHECK-NEXT: ret %interleaved.vec = load <6 x i32>, ptr %ptr %v0 = shufflevector <6 x i32> %interleaved.vec, <6 x i32> poison, <3 x i32> %v1 = shufflevector <6 x i32> %interleaved.vec, <6 x i32> poison, <3 x i32> @@ -159,142 +131,163 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: li a3, 58 +; RV32-NEXT: li a3, 62 ; RV32-NEXT: mul a2, a2, a3 ; RV32-NEXT: sub sp, sp, a2 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x3a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 58 * vlenb -; RV32-NEXT: addi a3, a1, 256 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x3e, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 62 * vlenb +; RV32-NEXT: addi a3, a1, 128 +; RV32-NEXT: addi a4, a1, 256 ; RV32-NEXT: li a2, 32 ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV32-NEXT: vle32.v v8, (a3) -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 25 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: addi a3, a1, 128 -; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vslideup.vi v16, v8, 4 +; RV32-NEXT: vle32.v v16, (a4) ; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 12 +; RV32-NEXT: li a5, 29 ; RV32-NEXT: mul a4, a4, a5 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vs4r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV32-NEXT: vid.v v20 -; RV32-NEXT: vadd.vi v4, v20, -10 -; RV32-NEXT: vmv.v.v v2, v20 +; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill +; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; RV32-NEXT: vid.v v10 ; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a5, a4, 4 +; RV32-NEXT: slli a5, a4, 3 ; RV32-NEXT: add a4, a5, a4 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vs2r.v v20, (a4) # Unknown-size Folded Spill +; RV32-NEXT: vs2r.v v10, (a4) # Unknown-size Folded Spill +; RV32-NEXT: vadd.vi v8, v10, -4 +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: li a5, 13 +; RV32-NEXT: mul a4, a4, a5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vs2r.v v8, (a4) # Unknown-size Folded Spill +; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; RV32-NEXT: vrgatherei16.vv v12, v16, v8 +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: li a5, 21 +; RV32-NEXT: mul a4, a4, a5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vs4r.v v12, (a4) # Unknown-size Folded Spill +; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV32-NEXT: vadd.vi v8, v10, -10 ; RV32-NEXT: lui a4, 12 -; RV32-NEXT: vmv.s.x v1, a4 -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v8, v8, 16 +; RV32-NEXT: vmv.s.x v0, a4 ; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a5, a4, 5 -; RV32-NEXT: add a4, a5, a4 +; RV32-NEXT: slli a4, a4, 3 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu -; RV32-NEXT: vmv1r.v v0, v1 +; RV32-NEXT: vs1r.v v0, (a4) # Unknown-size Folded Spill +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV32-NEXT: vslidedown.vi v16, v16, 16 ; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 4 +; RV32-NEXT: li a5, 45 +; RV32-NEXT: mul a4, a4, a5 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vs1r.v v1, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vrgatherei16.vv v16, v8, v4, v0.t +; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill +; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu +; RV32-NEXT: vrgatherei16.vv v12, v16, v8, v0.t ; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 21 +; RV32-NEXT: li a5, 25 ; RV32-NEXT: mul a4, a4, a5 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vs4r.v v16, (a4) # Unknown-size Folded Spill +; RV32-NEXT: vs4r.v v12, (a4) # Unknown-size Folded Spill ; RV32-NEXT: lui a4, %hi(.LCPI6_0) ; RV32-NEXT: addi a4, a4, %lo(.LCPI6_0) ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu +; RV32-NEXT: lui a5, %hi(.LCPI6_1) +; RV32-NEXT: addi a5, a5, %lo(.LCPI6_1) +; RV32-NEXT: lui a6, 1 ; RV32-NEXT: vle16.v v8, (a4) +; RV32-NEXT: addi a4, sp, 16 +; RV32-NEXT: vs4r.v v8, (a4) # Unknown-size Folded Spill +; RV32-NEXT: vle16.v v8, (a5) ; RV32-NEXT: csrr a4, vlenb ; RV32-NEXT: slli a4, a4, 2 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 16 ; RV32-NEXT: vs4r.v v8, (a4) # Unknown-size Folded Spill -; RV32-NEXT: lui a4, %hi(.LCPI6_1) -; RV32-NEXT: addi a4, a4, %lo(.LCPI6_1) -; RV32-NEXT: lui a5, 1 -; RV32-NEXT: vle16.v v8, (a4) -; RV32-NEXT: addi a4, sp, 16 -; RV32-NEXT: vs4r.v v8, (a4) # Unknown-size Folded Spill ; RV32-NEXT: vle32.v v16, (a1) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a4, 49 +; RV32-NEXT: li a4, 37 ; RV32-NEXT: mul a1, a1, a4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vle32.v v24, (a3) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 41 +; RV32-NEXT: li a3, 53 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; RV32-NEXT: addi a1, a5, -64 +; RV32-NEXT: addi a1, a6, -64 ; RV32-NEXT: vmv.s.x v0, a1 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: slli a3, a1, 4 +; RV32-NEXT: add a1, a3, a1 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vl4r.v v4, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vrgatherei16.vv v8, v16, v4 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl4r.v v4, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v8, v16, v4 -; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vl4r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vrgatherei16.vv v8, v24, v16, v0.t ; RV32-NEXT: vsetivli zero, 12, e32, m4, tu, ma ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 21 +; RV32-NEXT: li a3, 25 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vmv.v.v v12, v8 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 21 +; RV32-NEXT: li a3, 25 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 25 +; RV32-NEXT: slli a3, a1, 3 +; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl2r.v v10, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vadd.vi v8, v10, -2 +; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a3, 29 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vslideup.vi v12, v8, 2 +; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vrgatherei16.vv v12, v16, v8 ; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV32-NEXT: vadd.vi v8, v2, -8 +; RV32-NEXT: vadd.vi v8, v10, -8 +; RV32-NEXT: vmv2r.v v30, v10 ; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; RV32-NEXT: vmv1r.v v0, v1 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a3, a1, 5 -; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl1r.v v28, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vmv1r.v v0, v28 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a3, 45 +; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vrgatherei16.vv v12, v16, v8, v0.t -; RV32-NEXT: vmv.v.v v20, v12 +; RV32-NEXT: vmv.v.v v24, v12 ; RV32-NEXT: lui a1, %hi(.LCPI6_2) ; RV32-NEXT: addi a1, a1, %lo(.LCPI6_2) ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu @@ -308,165 +301,166 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 49 +; RV32-NEXT: li a3, 37 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vrgatherei16.vv v8, v0, v16 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: slli a3, a1, 4 +; RV32-NEXT: add a1, a3, a1 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 41 +; RV32-NEXT: li a3, 53 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl4r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v8, v24, v16, v0.t +; RV32-NEXT: vl4r.v v4, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vrgatherei16.vv v8, v16, v4, v0.t ; RV32-NEXT: vsetivli zero, 12, e32, m4, tu, ma -; RV32-NEXT: vmv.v.v v20, v8 +; RV32-NEXT: vmv.v.v v24, v8 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: slli a3, a1, 4 +; RV32-NEXT: add a1, a3, a1 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs4r.v v20, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vs4r.v v24, (a1) # Unknown-size Folded Spill ; RV32-NEXT: lui a1, %hi(.LCPI6_4) ; RV32-NEXT: addi a1, a1, %lo(.LCPI6_4) ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV32-NEXT: vle16.v v8, (a1) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 25 +; RV32-NEXT: li a3, 29 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v24, v16, v8 +; RV32-NEXT: vrgatherei16.vv v4, v16, v8 ; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV32-NEXT: vadd.vi v8, v30, -6 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a3, a1, 4 -; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: slli a1, a1, 2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl2r.v v8, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vadd.vi v8, v8, -6 +; RV32-NEXT: vs2r.v v8, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; RV32-NEXT: vmv1r.v v0, v28 +; RV32-NEXT: vmv1r.v v2, v28 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 4 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a3, a1, 5 -; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: li a3, 45 +; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v24, v16, v8, v0.t +; RV32-NEXT: vrgatherei16.vv v4, v16, v8, v0.t ; RV32-NEXT: lui a1, %hi(.LCPI6_5) ; RV32-NEXT: addi a1, a1, %lo(.LCPI6_5) ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu ; RV32-NEXT: lui a3, %hi(.LCPI6_6) ; RV32-NEXT: addi a3, a3, %lo(.LCPI6_6) -; RV32-NEXT: vle16.v v16, (a1) -; RV32-NEXT: vle16.v v28, (a3) +; RV32-NEXT: vle16.v v20, (a1) +; RV32-NEXT: vle16.v v8, (a3) +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a3, a1, 3 +; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill ; RV32-NEXT: li a1, 960 -; RV32-NEXT: vmv.s.x v20, a1 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vs1r.v v20, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vmv.s.x v1, a1 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 49 +; RV32-NEXT: li a3, 37 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v8, v0, v16 -; RV32-NEXT: vmv1r.v v0, v20 +; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vrgatherei16.vv v24, v8, v20 +; RV32-NEXT: vmv1r.v v0, v1 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 41 +; RV32-NEXT: li a3, 53 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v8, v16, v28, v0.t +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a3, a1, 3 +; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vrgatherei16.vv v24, v16, v8, v0.t ; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma -; RV32-NEXT: vmv.v.v v24, v8 +; RV32-NEXT: vmv.v.v v4, v24 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: slli a3, a1, 3 +; RV32-NEXT: add a1, a3, a1 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs4r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vs4r.v v4, (a1) # Unknown-size Folded Spill ; RV32-NEXT: lui a1, %hi(.LCPI6_7) ; RV32-NEXT: addi a1, a1, %lo(.LCPI6_7) -; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; RV32-NEXT: vle16.v v8, (a1) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 25 +; RV32-NEXT: li a3, 29 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vrgatherei16.vv v12, v16, v8 -; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV32-NEXT: vmv1r.v v0, v2 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a3, a1, 4 -; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: li a3, 13 +; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl2r.v v8, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vadd.vi v8, v8, -4 -; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 4 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a3, a1, 5 -; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: li a3, 45 +; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vrgatherei16.vv v12, v16, v8, v0.t -; RV32-NEXT: vmv.v.v v24, v12 +; RV32-NEXT: vmv.v.v v4, v12 ; RV32-NEXT: lui a1, %hi(.LCPI6_8) ; RV32-NEXT: addi a1, a1, %lo(.LCPI6_8) ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu ; RV32-NEXT: lui a3, %hi(.LCPI6_9) ; RV32-NEXT: addi a3, a3, %lo(.LCPI6_9) ; RV32-NEXT: vle16.v v16, (a1) -; RV32-NEXT: vle16.v v28, (a3) +; RV32-NEXT: vle16.v v20, (a3) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 49 +; RV32-NEXT: li a3, 37 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v8, v0, v16 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vrgatherei16.vv v8, v24, v16 +; RV32-NEXT: vmv1r.v v0, v1 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 41 +; RV32-NEXT: li a3, 53 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v8, v16, v28, v0.t +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vrgatherei16.vv v8, v24, v20, v0.t ; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma -; RV32-NEXT: vmv.v.v v24, v8 +; RV32-NEXT: vmv.v.v v4, v8 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a3, a1, 4 -; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: li a3, 13 +; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs4r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vs4r.v v4, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; RV32-NEXT: lui a1, %hi(.LCPI6_10) ; RV32-NEXT: addi a1, a1, %lo(.LCPI6_10) @@ -474,20 +468,25 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV32-NEXT: lui a1, 15 ; RV32-NEXT: vmv.s.x v0, a1 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: slli a1, a1, 3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 25 +; RV32-NEXT: li a3, 29 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vslideup.vi v20, v16, 6 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a3, a1, 5 -; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl2r.v v10, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vrgatherei16.vv v20, v16, v10 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a3, 45 +; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload @@ -502,13 +501,13 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV32-NEXT: li a1, 1008 ; RV32-NEXT: vmv.s.x v28, a1 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 25 +; RV32-NEXT: li a3, 29 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 49 +; RV32-NEXT: li a3, 37 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 @@ -516,7 +515,7 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV32-NEXT: vrgatherei16.vv v8, v0, v24 ; RV32-NEXT: vmv1r.v v0, v28 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 41 +; RV32-NEXT: li a3, 53 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 @@ -529,19 +528,19 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; RV32-NEXT: vle16.v v8, (a1) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: slli a1, a1, 3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 12 +; RV32-NEXT: li a3, 21 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl4r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a3, a1, 5 -; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: li a3, 45 +; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload @@ -554,33 +553,33 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV32-NEXT: vle16.v v24, (a1) ; RV32-NEXT: vle16.v v8, (a2) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a2, a1, 5 -; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: li a2, 45 +; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 49 +; RV32-NEXT: li a2, 37 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vrgatherei16.vv v8, v0, v24 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 25 +; RV32-NEXT: li a2, 29 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 41 +; RV32-NEXT: li a2, 53 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a2, a1, 5 -; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: li a2, 45 +; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl4r.v v4, (a1) # Unknown-size Folded Reload @@ -594,35 +593,37 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV32-NEXT: vse32.v v20, (a1) ; RV32-NEXT: addi a1, a0, 192 ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a3, a2, 4 -; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: li a3, 13 +; RV32-NEXT: mul a2, a2, a3 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 16 ; RV32-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vse32.v v8, (a1) ; RV32-NEXT: addi a1, a0, 128 ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 2 +; RV32-NEXT: slli a3, a2, 3 +; RV32-NEXT: add a2, a3, a2 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 16 ; RV32-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vse32.v v8, (a1) ; RV32-NEXT: addi a1, a0, 64 ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: slli a3, a2, 4 +; RV32-NEXT: add a2, a3, a2 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 16 ; RV32-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vse32.v v8, (a1) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 21 +; RV32-NEXT: li a2, 25 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vse32.v v8, (a0) ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 58 +; RV32-NEXT: li a1, 62 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll index df41ac10f80d36..890707c6337fad 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll @@ -14638,5 +14638,453 @@ define <8 x i16> @mgather_shuffle_vrgather(ptr %base) { %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %allones, <8 x i16> poison) ret <8 x i16> %v } + +; v32i64 is not a legal type, so make sure we don't try to combine the mgather +; to a vlse intrinsic until it is legalized and split. +define <32 x i64> @mgather_strided_split(ptr %base) { +; RV32V-LABEL: mgather_strided_split: +; RV32V: # %bb.0: +; RV32V-NEXT: li a1, 16 +; RV32V-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32V-NEXT: vlse64.v v8, (a0), a1 +; RV32V-NEXT: addi a0, a0, 256 +; RV32V-NEXT: vlse64.v v16, (a0), a1 +; RV32V-NEXT: ret +; +; RV64V-LABEL: mgather_strided_split: +; RV64V: # %bb.0: +; RV64V-NEXT: li a1, 16 +; RV64V-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64V-NEXT: vlse64.v v8, (a0), a1 +; RV64V-NEXT: addi a0, a0, 256 +; RV64V-NEXT: vlse64.v v16, (a0), a1 +; RV64V-NEXT: ret +; +; RV32ZVE32F-LABEL: mgather_strided_split: +; RV32ZVE32F: # %bb.0: +; RV32ZVE32F-NEXT: addi sp, sp, -512 +; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 512 +; RV32ZVE32F-NEXT: sw ra, 508(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: sw s0, 504(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: sw s2, 500(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: sw s3, 496(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: sw s4, 492(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: sw s5, 488(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: sw s6, 484(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: sw s7, 480(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: sw s8, 476(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: sw s9, 472(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: sw s10, 468(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: sw s11, 464(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: .cfi_offset ra, -4 +; RV32ZVE32F-NEXT: .cfi_offset s0, -8 +; RV32ZVE32F-NEXT: .cfi_offset s2, -12 +; RV32ZVE32F-NEXT: .cfi_offset s3, -16 +; RV32ZVE32F-NEXT: .cfi_offset s4, -20 +; RV32ZVE32F-NEXT: .cfi_offset s5, -24 +; RV32ZVE32F-NEXT: .cfi_offset s6, -28 +; RV32ZVE32F-NEXT: .cfi_offset s7, -32 +; RV32ZVE32F-NEXT: .cfi_offset s8, -36 +; RV32ZVE32F-NEXT: .cfi_offset s9, -40 +; RV32ZVE32F-NEXT: .cfi_offset s10, -44 +; RV32ZVE32F-NEXT: .cfi_offset s11, -48 +; RV32ZVE32F-NEXT: addi s0, sp, 512 +; RV32ZVE32F-NEXT: .cfi_def_cfa s0, 0 +; RV32ZVE32F-NEXT: andi sp, sp, -128 +; RV32ZVE32F-NEXT: li a2, 32 +; RV32ZVE32F-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32ZVE32F-NEXT: vid.v v8 +; RV32ZVE32F-NEXT: vsll.vi v8, v8, 4 +; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 +; RV32ZVE32F-NEXT: vmv.x.s a1, v8 +; RV32ZVE32F-NEXT: lw a3, 0(a1) +; RV32ZVE32F-NEXT: sw a3, 216(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: lw a1, 4(a1) +; RV32ZVE32F-NEXT: sw a1, 208(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV32ZVE32F-NEXT: vslidedown.vi v16, v8, 1 +; RV32ZVE32F-NEXT: vmv.x.s a1, v16 +; RV32ZVE32F-NEXT: lw a3, 0(a1) +; RV32ZVE32F-NEXT: sw a3, 252(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: lw a1, 4(a1) +; RV32ZVE32F-NEXT: sw a1, 248(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: vslidedown.vi v16, v8, 2 +; RV32ZVE32F-NEXT: vmv.x.s a1, v16 +; RV32ZVE32F-NEXT: lw a3, 0(a1) +; RV32ZVE32F-NEXT: sw a3, 244(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: lw a1, 4(a1) +; RV32ZVE32F-NEXT: sw a1, 236(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: vslidedown.vi v16, v8, 3 +; RV32ZVE32F-NEXT: vmv.x.s a1, v16 +; RV32ZVE32F-NEXT: lw a3, 0(a1) +; RV32ZVE32F-NEXT: sw a3, 228(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: lw a1, 4(a1) +; RV32ZVE32F-NEXT: sw a1, 220(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vslidedown.vi v16, v8, 4 +; RV32ZVE32F-NEXT: vmv.x.s a1, v16 +; RV32ZVE32F-NEXT: lw a3, 0(a1) +; RV32ZVE32F-NEXT: sw a3, 240(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: lw a1, 4(a1) +; RV32ZVE32F-NEXT: sw a1, 232(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: vslidedown.vi v16, v8, 5 +; RV32ZVE32F-NEXT: vmv.x.s a1, v16 +; RV32ZVE32F-NEXT: lw a3, 0(a1) +; RV32ZVE32F-NEXT: sw a3, 224(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: lw a1, 4(a1) +; RV32ZVE32F-NEXT: sw a1, 212(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: vslidedown.vi v16, v8, 6 +; RV32ZVE32F-NEXT: vmv.x.s a1, v16 +; RV32ZVE32F-NEXT: lw a3, 0(a1) +; RV32ZVE32F-NEXT: sw a3, 204(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: lw a1, 4(a1) +; RV32ZVE32F-NEXT: sw a1, 200(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: vslidedown.vi v16, v8, 7 +; RV32ZVE32F-NEXT: vmv.x.s a1, v16 +; RV32ZVE32F-NEXT: lw a3, 0(a1) +; RV32ZVE32F-NEXT: sw a3, 196(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: lw a1, 4(a1) +; RV32ZVE32F-NEXT: sw a1, 192(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: addi a1, sp, 256 +; RV32ZVE32F-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32ZVE32F-NEXT: vse32.v v8, (a1) +; RV32ZVE32F-NEXT: lw a1, 288(sp) +; RV32ZVE32F-NEXT: lw a2, 292(sp) +; RV32ZVE32F-NEXT: lw a3, 0(a1) +; RV32ZVE32F-NEXT: sw a3, 188(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: lw a1, 4(a1) +; RV32ZVE32F-NEXT: sw a1, 184(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: lw a1, 296(sp) +; RV32ZVE32F-NEXT: lw a3, 0(a2) +; RV32ZVE32F-NEXT: sw a3, 180(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: lw a2, 4(a2) +; RV32ZVE32F-NEXT: sw a2, 176(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: lw a2, 300(sp) +; RV32ZVE32F-NEXT: lw a3, 0(a1) +; RV32ZVE32F-NEXT: sw a3, 172(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: lw a1, 4(a1) +; RV32ZVE32F-NEXT: sw a1, 168(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: lw a1, 304(sp) +; RV32ZVE32F-NEXT: lw a3, 0(a2) +; RV32ZVE32F-NEXT: sw a3, 164(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: lw a2, 4(a2) +; RV32ZVE32F-NEXT: sw a2, 160(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: lw a2, 308(sp) +; RV32ZVE32F-NEXT: lw a3, 0(a1) +; RV32ZVE32F-NEXT: sw a3, 156(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: lw a1, 4(a1) +; RV32ZVE32F-NEXT: sw a1, 152(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: lw a1, 312(sp) +; RV32ZVE32F-NEXT: lw a3, 0(a2) +; RV32ZVE32F-NEXT: sw a3, 148(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: lw a2, 4(a2) +; RV32ZVE32F-NEXT: sw a2, 144(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: lw a2, 316(sp) +; RV32ZVE32F-NEXT: lw a3, 0(a1) +; RV32ZVE32F-NEXT: sw a3, 140(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: lw a1, 4(a1) +; RV32ZVE32F-NEXT: sw a1, 136(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: lw a1, 320(sp) +; RV32ZVE32F-NEXT: lw a3, 0(a2) +; RV32ZVE32F-NEXT: sw a3, 132(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: lw a2, 4(a2) +; RV32ZVE32F-NEXT: sw a2, 128(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: lw a2, 324(sp) +; RV32ZVE32F-NEXT: lw a3, 0(a1) +; RV32ZVE32F-NEXT: sw a3, 124(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: lw a1, 4(a1) +; RV32ZVE32F-NEXT: sw a1, 120(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: lw a1, 328(sp) +; RV32ZVE32F-NEXT: lw a3, 0(a2) +; RV32ZVE32F-NEXT: sw a3, 116(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: lw a2, 4(a2) +; RV32ZVE32F-NEXT: sw a2, 112(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: lw a2, 332(sp) +; RV32ZVE32F-NEXT: lw a3, 0(a1) +; RV32ZVE32F-NEXT: sw a3, 104(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: lw ra, 4(a1) +; RV32ZVE32F-NEXT: lw a1, 336(sp) +; RV32ZVE32F-NEXT: lw s10, 0(a2) +; RV32ZVE32F-NEXT: lw s8, 4(a2) +; RV32ZVE32F-NEXT: lw a2, 340(sp) +; RV32ZVE32F-NEXT: lw s6, 0(a1) +; RV32ZVE32F-NEXT: lw s4, 4(a1) +; RV32ZVE32F-NEXT: lw a4, 344(sp) +; RV32ZVE32F-NEXT: lw s2, 0(a2) +; RV32ZVE32F-NEXT: lw t5, 4(a2) +; RV32ZVE32F-NEXT: lw a2, 348(sp) +; RV32ZVE32F-NEXT: lw t3, 0(a4) +; RV32ZVE32F-NEXT: lw t2, 4(a4) +; RV32ZVE32F-NEXT: lw a4, 352(sp) +; RV32ZVE32F-NEXT: lw t0, 0(a2) +; RV32ZVE32F-NEXT: lw a7, 4(a2) +; RV32ZVE32F-NEXT: lw a2, 356(sp) +; RV32ZVE32F-NEXT: lw a6, 0(a4) +; RV32ZVE32F-NEXT: lw a5, 4(a4) +; RV32ZVE32F-NEXT: lw a4, 360(sp) +; RV32ZVE32F-NEXT: lw a1, 0(a2) +; RV32ZVE32F-NEXT: sw a1, 108(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: lw a1, 4(a2) +; RV32ZVE32F-NEXT: sw a1, 100(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: lw a2, 364(sp) +; RV32ZVE32F-NEXT: lw s11, 0(a4) +; RV32ZVE32F-NEXT: lw s9, 4(a4) +; RV32ZVE32F-NEXT: lw a1, 368(sp) +; RV32ZVE32F-NEXT: lw s7, 0(a2) +; RV32ZVE32F-NEXT: lw s5, 4(a2) +; RV32ZVE32F-NEXT: lw a3, 372(sp) +; RV32ZVE32F-NEXT: lw s3, 0(a1) +; RV32ZVE32F-NEXT: lw t6, 4(a1) +; RV32ZVE32F-NEXT: lw a2, 376(sp) +; RV32ZVE32F-NEXT: lw t4, 0(a3) +; RV32ZVE32F-NEXT: lw a1, 380(sp) +; RV32ZVE32F-NEXT: lw t1, 4(a3) +; RV32ZVE32F-NEXT: lw a4, 0(a2) +; RV32ZVE32F-NEXT: lw a3, 4(a2) +; RV32ZVE32F-NEXT: lw a2, 0(a1) +; RV32ZVE32F-NEXT: lw a1, 4(a1) +; RV32ZVE32F-NEXT: sw a5, 196(a0) +; RV32ZVE32F-NEXT: sw a6, 192(a0) +; RV32ZVE32F-NEXT: sw a7, 188(a0) +; RV32ZVE32F-NEXT: sw t0, 184(a0) +; RV32ZVE32F-NEXT: sw t2, 180(a0) +; RV32ZVE32F-NEXT: sw t3, 176(a0) +; RV32ZVE32F-NEXT: sw t5, 172(a0) +; RV32ZVE32F-NEXT: sw s2, 168(a0) +; RV32ZVE32F-NEXT: sw s4, 164(a0) +; RV32ZVE32F-NEXT: sw s6, 160(a0) +; RV32ZVE32F-NEXT: sw s8, 156(a0) +; RV32ZVE32F-NEXT: sw s10, 152(a0) +; RV32ZVE32F-NEXT: sw ra, 148(a0) +; RV32ZVE32F-NEXT: lw a5, 104(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a5, 144(a0) +; RV32ZVE32F-NEXT: lw a5, 112(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a5, 140(a0) +; RV32ZVE32F-NEXT: lw a5, 116(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a5, 136(a0) +; RV32ZVE32F-NEXT: lw a5, 120(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a5, 132(a0) +; RV32ZVE32F-NEXT: lw a5, 124(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a5, 128(a0) +; RV32ZVE32F-NEXT: lw a5, 128(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a5, 124(a0) +; RV32ZVE32F-NEXT: lw a5, 132(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a5, 120(a0) +; RV32ZVE32F-NEXT: lw a5, 136(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a5, 116(a0) +; RV32ZVE32F-NEXT: lw a5, 140(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a5, 112(a0) +; RV32ZVE32F-NEXT: lw a5, 144(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a5, 108(a0) +; RV32ZVE32F-NEXT: lw a5, 148(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a5, 104(a0) +; RV32ZVE32F-NEXT: lw a5, 152(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a5, 100(a0) +; RV32ZVE32F-NEXT: lw a5, 156(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a5, 96(a0) +; RV32ZVE32F-NEXT: lw a5, 160(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a5, 92(a0) +; RV32ZVE32F-NEXT: lw a5, 164(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a5, 88(a0) +; RV32ZVE32F-NEXT: lw a5, 168(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a5, 84(a0) +; RV32ZVE32F-NEXT: lw a5, 172(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a5, 80(a0) +; RV32ZVE32F-NEXT: lw a5, 176(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a5, 76(a0) +; RV32ZVE32F-NEXT: lw a5, 180(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a5, 72(a0) +; RV32ZVE32F-NEXT: lw a5, 184(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a5, 68(a0) +; RV32ZVE32F-NEXT: lw a5, 188(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a5, 64(a0) +; RV32ZVE32F-NEXT: lw a5, 208(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a5, 4(a0) +; RV32ZVE32F-NEXT: lw a5, 216(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a5, 0(a0) +; RV32ZVE32F-NEXT: sw a1, 252(a0) +; RV32ZVE32F-NEXT: sw a2, 248(a0) +; RV32ZVE32F-NEXT: sw a3, 244(a0) +; RV32ZVE32F-NEXT: sw a4, 240(a0) +; RV32ZVE32F-NEXT: sw t1, 236(a0) +; RV32ZVE32F-NEXT: sw t4, 232(a0) +; RV32ZVE32F-NEXT: sw t6, 228(a0) +; RV32ZVE32F-NEXT: sw s3, 224(a0) +; RV32ZVE32F-NEXT: sw s5, 220(a0) +; RV32ZVE32F-NEXT: sw s7, 216(a0) +; RV32ZVE32F-NEXT: sw s9, 212(a0) +; RV32ZVE32F-NEXT: sw s11, 208(a0) +; RV32ZVE32F-NEXT: lw a1, 100(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a1, 204(a0) +; RV32ZVE32F-NEXT: lw a1, 108(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a1, 200(a0) +; RV32ZVE32F-NEXT: lw a1, 220(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a1, 28(a0) +; RV32ZVE32F-NEXT: lw a1, 228(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a1, 24(a0) +; RV32ZVE32F-NEXT: lw a1, 236(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a1, 20(a0) +; RV32ZVE32F-NEXT: lw a1, 244(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a1, 16(a0) +; RV32ZVE32F-NEXT: lw a1, 248(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a1, 12(a0) +; RV32ZVE32F-NEXT: lw a1, 252(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a1, 8(a0) +; RV32ZVE32F-NEXT: lw a1, 192(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a1, 60(a0) +; RV32ZVE32F-NEXT: lw a1, 196(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a1, 56(a0) +; RV32ZVE32F-NEXT: lw a1, 200(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a1, 52(a0) +; RV32ZVE32F-NEXT: lw a1, 204(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a1, 48(a0) +; RV32ZVE32F-NEXT: lw a1, 212(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a1, 44(a0) +; RV32ZVE32F-NEXT: lw a1, 224(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a1, 40(a0) +; RV32ZVE32F-NEXT: lw a1, 232(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a1, 36(a0) +; RV32ZVE32F-NEXT: lw a1, 240(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: sw a1, 32(a0) +; RV32ZVE32F-NEXT: addi sp, s0, -512 +; RV32ZVE32F-NEXT: lw ra, 508(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: lw s0, 504(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: lw s2, 500(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: lw s3, 496(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: lw s4, 492(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: lw s5, 488(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: lw s6, 484(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: lw s7, 480(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: lw s8, 476(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: lw s9, 472(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: lw s10, 468(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: lw s11, 464(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: addi sp, sp, 512 +; RV32ZVE32F-NEXT: ret +; +; RV64ZVE32F-LABEL: mgather_strided_split: +; RV64ZVE32F: # %bb.0: +; RV64ZVE32F-NEXT: addi sp, sp, -144 +; RV64ZVE32F-NEXT: .cfi_def_cfa_offset 144 +; RV64ZVE32F-NEXT: sd ra, 136(sp) # 8-byte Folded Spill +; RV64ZVE32F-NEXT: sd s0, 128(sp) # 8-byte Folded Spill +; RV64ZVE32F-NEXT: sd s1, 120(sp) # 8-byte Folded Spill +; RV64ZVE32F-NEXT: sd s2, 112(sp) # 8-byte Folded Spill +; RV64ZVE32F-NEXT: sd s3, 104(sp) # 8-byte Folded Spill +; RV64ZVE32F-NEXT: sd s4, 96(sp) # 8-byte Folded Spill +; RV64ZVE32F-NEXT: sd s5, 88(sp) # 8-byte Folded Spill +; RV64ZVE32F-NEXT: sd s6, 80(sp) # 8-byte Folded Spill +; RV64ZVE32F-NEXT: sd s7, 72(sp) # 8-byte Folded Spill +; RV64ZVE32F-NEXT: sd s8, 64(sp) # 8-byte Folded Spill +; RV64ZVE32F-NEXT: sd s9, 56(sp) # 8-byte Folded Spill +; RV64ZVE32F-NEXT: sd s10, 48(sp) # 8-byte Folded Spill +; RV64ZVE32F-NEXT: sd s11, 40(sp) # 8-byte Folded Spill +; RV64ZVE32F-NEXT: .cfi_offset ra, -8 +; RV64ZVE32F-NEXT: .cfi_offset s0, -16 +; RV64ZVE32F-NEXT: .cfi_offset s1, -24 +; RV64ZVE32F-NEXT: .cfi_offset s2, -32 +; RV64ZVE32F-NEXT: .cfi_offset s3, -40 +; RV64ZVE32F-NEXT: .cfi_offset s4, -48 +; RV64ZVE32F-NEXT: .cfi_offset s5, -56 +; RV64ZVE32F-NEXT: .cfi_offset s6, -64 +; RV64ZVE32F-NEXT: .cfi_offset s7, -72 +; RV64ZVE32F-NEXT: .cfi_offset s8, -80 +; RV64ZVE32F-NEXT: .cfi_offset s9, -88 +; RV64ZVE32F-NEXT: .cfi_offset s10, -96 +; RV64ZVE32F-NEXT: .cfi_offset s11, -104 +; RV64ZVE32F-NEXT: ld a2, 0(a1) +; RV64ZVE32F-NEXT: sd a2, 32(sp) # 8-byte Folded Spill +; RV64ZVE32F-NEXT: ld a2, 16(a1) +; RV64ZVE32F-NEXT: sd a2, 24(sp) # 8-byte Folded Spill +; RV64ZVE32F-NEXT: ld a2, 32(a1) +; RV64ZVE32F-NEXT: sd a2, 16(sp) # 8-byte Folded Spill +; RV64ZVE32F-NEXT: ld a2, 48(a1) +; RV64ZVE32F-NEXT: sd a2, 8(sp) # 8-byte Folded Spill +; RV64ZVE32F-NEXT: ld a2, 64(a1) +; RV64ZVE32F-NEXT: sd a2, 0(sp) # 8-byte Folded Spill +; RV64ZVE32F-NEXT: ld a7, 80(a1) +; RV64ZVE32F-NEXT: ld t0, 96(a1) +; RV64ZVE32F-NEXT: ld t1, 112(a1) +; RV64ZVE32F-NEXT: ld t2, 128(a1) +; RV64ZVE32F-NEXT: ld t3, 144(a1) +; RV64ZVE32F-NEXT: ld t4, 160(a1) +; RV64ZVE32F-NEXT: ld t5, 176(a1) +; RV64ZVE32F-NEXT: ld t6, 192(a1) +; RV64ZVE32F-NEXT: ld s0, 208(a1) +; RV64ZVE32F-NEXT: ld s1, 224(a1) +; RV64ZVE32F-NEXT: ld s2, 240(a1) +; RV64ZVE32F-NEXT: ld s3, 256(a1) +; RV64ZVE32F-NEXT: ld s4, 272(a1) +; RV64ZVE32F-NEXT: ld s5, 288(a1) +; RV64ZVE32F-NEXT: ld s6, 304(a1) +; RV64ZVE32F-NEXT: ld s7, 320(a1) +; RV64ZVE32F-NEXT: ld s8, 336(a1) +; RV64ZVE32F-NEXT: ld s9, 352(a1) +; RV64ZVE32F-NEXT: ld s10, 368(a1) +; RV64ZVE32F-NEXT: ld s11, 384(a1) +; RV64ZVE32F-NEXT: ld ra, 400(a1) +; RV64ZVE32F-NEXT: ld a6, 416(a1) +; RV64ZVE32F-NEXT: ld a5, 432(a1) +; RV64ZVE32F-NEXT: ld a2, 496(a1) +; RV64ZVE32F-NEXT: ld a3, 480(a1) +; RV64ZVE32F-NEXT: ld a4, 464(a1) +; RV64ZVE32F-NEXT: ld a1, 448(a1) +; RV64ZVE32F-NEXT: sd a2, 248(a0) +; RV64ZVE32F-NEXT: sd a3, 240(a0) +; RV64ZVE32F-NEXT: sd a4, 232(a0) +; RV64ZVE32F-NEXT: sd a1, 224(a0) +; RV64ZVE32F-NEXT: sd a5, 216(a0) +; RV64ZVE32F-NEXT: sd a6, 208(a0) +; RV64ZVE32F-NEXT: sd ra, 200(a0) +; RV64ZVE32F-NEXT: sd s11, 192(a0) +; RV64ZVE32F-NEXT: sd s10, 184(a0) +; RV64ZVE32F-NEXT: sd s9, 176(a0) +; RV64ZVE32F-NEXT: sd s8, 168(a0) +; RV64ZVE32F-NEXT: sd s7, 160(a0) +; RV64ZVE32F-NEXT: sd s6, 152(a0) +; RV64ZVE32F-NEXT: sd s5, 144(a0) +; RV64ZVE32F-NEXT: sd s4, 136(a0) +; RV64ZVE32F-NEXT: sd s3, 128(a0) +; RV64ZVE32F-NEXT: sd s2, 120(a0) +; RV64ZVE32F-NEXT: sd s1, 112(a0) +; RV64ZVE32F-NEXT: sd s0, 104(a0) +; RV64ZVE32F-NEXT: sd t6, 96(a0) +; RV64ZVE32F-NEXT: sd t5, 88(a0) +; RV64ZVE32F-NEXT: sd t4, 80(a0) +; RV64ZVE32F-NEXT: sd t3, 72(a0) +; RV64ZVE32F-NEXT: sd t2, 64(a0) +; RV64ZVE32F-NEXT: sd t1, 56(a0) +; RV64ZVE32F-NEXT: sd t0, 48(a0) +; RV64ZVE32F-NEXT: sd a7, 40(a0) +; RV64ZVE32F-NEXT: ld a1, 0(sp) # 8-byte Folded Reload +; RV64ZVE32F-NEXT: sd a1, 32(a0) +; RV64ZVE32F-NEXT: ld a1, 8(sp) # 8-byte Folded Reload +; RV64ZVE32F-NEXT: sd a1, 24(a0) +; RV64ZVE32F-NEXT: ld a1, 16(sp) # 8-byte Folded Reload +; RV64ZVE32F-NEXT: sd a1, 16(a0) +; RV64ZVE32F-NEXT: ld a1, 24(sp) # 8-byte Folded Reload +; RV64ZVE32F-NEXT: sd a1, 8(a0) +; RV64ZVE32F-NEXT: ld a1, 32(sp) # 8-byte Folded Reload +; RV64ZVE32F-NEXT: sd a1, 0(a0) +; RV64ZVE32F-NEXT: ld ra, 136(sp) # 8-byte Folded Reload +; RV64ZVE32F-NEXT: ld s0, 128(sp) # 8-byte Folded Reload +; RV64ZVE32F-NEXT: ld s1, 120(sp) # 8-byte Folded Reload +; RV64ZVE32F-NEXT: ld s2, 112(sp) # 8-byte Folded Reload +; RV64ZVE32F-NEXT: ld s3, 104(sp) # 8-byte Folded Reload +; RV64ZVE32F-NEXT: ld s4, 96(sp) # 8-byte Folded Reload +; RV64ZVE32F-NEXT: ld s5, 88(sp) # 8-byte Folded Reload +; RV64ZVE32F-NEXT: ld s6, 80(sp) # 8-byte Folded Reload +; RV64ZVE32F-NEXT: ld s7, 72(sp) # 8-byte Folded Reload +; RV64ZVE32F-NEXT: ld s8, 64(sp) # 8-byte Folded Reload +; RV64ZVE32F-NEXT: ld s9, 56(sp) # 8-byte Folded Reload +; RV64ZVE32F-NEXT: ld s10, 48(sp) # 8-byte Folded Reload +; RV64ZVE32F-NEXT: ld s11, 40(sp) # 8-byte Folded Reload +; RV64ZVE32F-NEXT: addi sp, sp, 144 +; RV64ZVE32F-NEXT: ret + %ptrs = getelementptr inbounds i64, ptr %base, <32 x i64> + %x = call <32 x i64> @llvm.masked.gather.v32i64.v32p0(<32 x ptr> %ptrs, i32 8, <32 x i1> shufflevector(<32 x i1> insertelement(<32 x i1> poison, i1 true, i32 0), <32 x i1> poison, <32 x i32> zeroinitializer), <32 x i64> poison) + ret <32 x i64> %x +} + ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; RV64: {{.*}} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-transpose.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-transpose.ll index a34fa9502d93b3..d0777962a75651 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-transpose.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-transpose.ll @@ -8,11 +8,13 @@ define <8 x i8> @trn1.v8i8(<8 x i8> %v0, <8 x i8> %v1) { ; CHECK-LABEL: trn1.v8i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vid.v v11 +; CHECK-NEXT: vrgather.vv v10, v8, v11 ; CHECK-NEXT: li a0, 170 ; CHECK-NEXT: vmv.s.x v0, a0 -; CHECK-NEXT: vadd.vi v10, v10, -1 -; CHECK-NEXT: vrgather.vv v8, v9, v10, v0.t +; CHECK-NEXT: vadd.vi v8, v11, -1 +; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> ret <8 x i8> %tmp0 @@ -22,11 +24,13 @@ define <8 x i8> @trn2.v8i8(<8 x i8> %v0, <8 x i8> %v1) { ; CHECK-LABEL: trn2.v8i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; CHECK-NEXT: vslidedown.vi v8, v8, 1 +; CHECK-NEXT: vid.v v11 +; CHECK-NEXT: vadd.vi v12, v11, 1 ; CHECK-NEXT: li a0, 170 ; CHECK-NEXT: vmv.s.x v0, a0 -; CHECK-NEXT: vid.v v10 -; CHECK-NEXT: vrgather.vv v8, v9, v10, v0.t +; CHECK-NEXT: vrgather.vv v10, v8, v12 +; CHECK-NEXT: vrgather.vv v10, v9, v11, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> ret <8 x i8> %tmp0 @@ -36,14 +40,16 @@ define <16 x i8> @trn1.v16i8(<16 x i8> %v0, <16 x i8> %v1) { ; CHECK-LABEL: trn1.v16i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; CHECK-NEXT: vid.v v10 -; CHECK-NEXT: vadd.vi v10, v10, -1 +; CHECK-NEXT: vid.v v11 +; CHECK-NEXT: vrgather.vv v10, v8, v11 +; CHECK-NEXT: vadd.vi v8, v11, -1 ; CHECK-NEXT: lui a0, 11 ; CHECK-NEXT: addi a0, a0, -1366 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu -; CHECK-NEXT: vrgather.vv v8, v9, v10, v0.t +; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> ret <16 x i8> %tmp0 @@ -53,14 +59,16 @@ define <16 x i8> @trn2.v16i8(<16 x i8> %v0, <16 x i8> %v1) { ; CHECK-LABEL: trn2.v16i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 1 +; CHECK-NEXT: vid.v v11 +; CHECK-NEXT: vadd.vi v12, v11, 1 +; CHECK-NEXT: vrgather.vv v10, v8, v12 ; CHECK-NEXT: lui a0, 11 ; CHECK-NEXT: addi a0, a0, -1366 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu -; CHECK-NEXT: vid.v v10 -; CHECK-NEXT: vrgather.vv v8, v9, v10, v0.t +; CHECK-NEXT: vrgather.vv v10, v9, v11, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> ret <16 x i8> %tmp0 @@ -70,10 +78,12 @@ define <4 x i16> @trn1.v4i16(<4 x i16> %v0, <4 x i16> %v1) { ; CHECK-LABEL: trn1.v4i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu -; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vid.v v11 +; CHECK-NEXT: vrgather.vv v10, v8, v11 ; CHECK-NEXT: vmv.v.i v0, 10 -; CHECK-NEXT: vadd.vi v10, v10, -1 -; CHECK-NEXT: vrgather.vv v8, v9, v10, v0.t +; CHECK-NEXT: vadd.vi v8, v11, -1 +; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret %tmp0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <4 x i32> ret <4 x i16> %tmp0 @@ -83,10 +93,12 @@ define <4 x i16> @trn2.v4i16(<4 x i16> %v0, <4 x i16> %v1) { ; CHECK-LABEL: trn2.v4i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu -; CHECK-NEXT: vslidedown.vi v8, v8, 1 +; CHECK-NEXT: vid.v v11 +; CHECK-NEXT: vadd.vi v12, v11, 1 ; CHECK-NEXT: vmv.v.i v0, 10 -; CHECK-NEXT: vid.v v10 -; CHECK-NEXT: vrgather.vv v8, v9, v10, v0.t +; CHECK-NEXT: vrgather.vv v10, v8, v12 +; CHECK-NEXT: vrgather.vv v10, v9, v11, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret %tmp0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <4 x i32> ret <4 x i16> %tmp0 @@ -96,11 +108,13 @@ define <8 x i16> @trn1.v8i16(<8 x i16> %v0, <8 x i16> %v1) { ; CHECK-LABEL: trn1.v8i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vid.v v11 +; CHECK-NEXT: vrgather.vv v10, v8, v11 ; CHECK-NEXT: li a0, 170 ; CHECK-NEXT: vmv.s.x v0, a0 -; CHECK-NEXT: vadd.vi v10, v10, -1 -; CHECK-NEXT: vrgather.vv v8, v9, v10, v0.t +; CHECK-NEXT: vadd.vi v8, v11, -1 +; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret %tmp0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> ret <8 x i16> %tmp0 @@ -110,11 +124,13 @@ define <8 x i16> @trn2.v8i16(<8 x i16> %v0, <8 x i16> %v1) { ; CHECK-LABEL: trn2.v8i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; CHECK-NEXT: vslidedown.vi v8, v8, 1 +; CHECK-NEXT: vid.v v11 +; CHECK-NEXT: vadd.vi v12, v11, 1 ; CHECK-NEXT: li a0, 170 ; CHECK-NEXT: vmv.s.x v0, a0 -; CHECK-NEXT: vid.v v10 -; CHECK-NEXT: vrgather.vv v8, v9, v10, v0.t +; CHECK-NEXT: vrgather.vv v10, v8, v12 +; CHECK-NEXT: vrgather.vv v10, v9, v11, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret %tmp0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> ret <8 x i16> %tmp0 @@ -147,10 +163,12 @@ define <4 x i32> @trn1.v4i32(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: trn1.v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vid.v v11 +; CHECK-NEXT: vrgather.vv v10, v8, v11 ; CHECK-NEXT: vmv.v.i v0, 10 -; CHECK-NEXT: vadd.vi v10, v10, -1 -; CHECK-NEXT: vrgather.vv v8, v9, v10, v0.t +; CHECK-NEXT: vadd.vi v8, v11, -1 +; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret %tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> ret <4 x i32> %tmp0 @@ -160,10 +178,12 @@ define <4 x i32> @trn2.v4i32(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: trn2.v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; CHECK-NEXT: vslidedown.vi v8, v8, 1 +; CHECK-NEXT: vid.v v11 +; CHECK-NEXT: vadd.vi v12, v11, 1 ; CHECK-NEXT: vmv.v.i v0, 10 -; CHECK-NEXT: vid.v v10 -; CHECK-NEXT: vrgather.vv v8, v9, v10, v0.t +; CHECK-NEXT: vrgather.vv v10, v8, v12 +; CHECK-NEXT: vrgather.vv v10, v9, v11, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret %tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> ret <4 x i32> %tmp0 @@ -219,10 +239,12 @@ define <4 x float> @trn1.v4f32(<4 x float> %v0, <4 x float> %v1) { ; CHECK-LABEL: trn1.v4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vid.v v11 +; CHECK-NEXT: vrgather.vv v10, v8, v11 ; CHECK-NEXT: vmv.v.i v0, 10 -; CHECK-NEXT: vadd.vi v10, v10, -1 -; CHECK-NEXT: vrgather.vv v8, v9, v10, v0.t +; CHECK-NEXT: vadd.vi v8, v11, -1 +; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret %tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> ret <4 x float> %tmp0 @@ -232,10 +254,12 @@ define <4 x float> @trn2.v4f32(<4 x float> %v0, <4 x float> %v1) { ; CHECK-LABEL: trn2.v4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; CHECK-NEXT: vslidedown.vi v8, v8, 1 +; CHECK-NEXT: vid.v v11 +; CHECK-NEXT: vadd.vi v12, v11, 1 ; CHECK-NEXT: vmv.v.i v0, 10 -; CHECK-NEXT: vid.v v10 -; CHECK-NEXT: vrgather.vv v8, v9, v10, v0.t +; CHECK-NEXT: vrgather.vv v10, v8, v12 +; CHECK-NEXT: vrgather.vv v10, v9, v11, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret %tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> ret <4 x float> %tmp0 @@ -268,10 +292,12 @@ define <4 x half> @trn1.v4f16(<4 x half> %v0, <4 x half> %v1) { ; CHECK-LABEL: trn1.v4f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu -; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vid.v v11 +; CHECK-NEXT: vrgather.vv v10, v8, v11 ; CHECK-NEXT: vmv.v.i v0, 10 -; CHECK-NEXT: vadd.vi v10, v10, -1 -; CHECK-NEXT: vrgather.vv v8, v9, v10, v0.t +; CHECK-NEXT: vadd.vi v8, v11, -1 +; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret %tmp0 = shufflevector <4 x half> %v0, <4 x half> %v1, <4 x i32> ret <4 x half> %tmp0 @@ -281,10 +307,12 @@ define <4 x half> @trn2.v4f16(<4 x half> %v0, <4 x half> %v1) { ; CHECK-LABEL: trn2.v4f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu -; CHECK-NEXT: vslidedown.vi v8, v8, 1 +; CHECK-NEXT: vid.v v11 +; CHECK-NEXT: vadd.vi v12, v11, 1 ; CHECK-NEXT: vmv.v.i v0, 10 -; CHECK-NEXT: vid.v v10 -; CHECK-NEXT: vrgather.vv v8, v9, v10, v0.t +; CHECK-NEXT: vrgather.vv v10, v8, v12 +; CHECK-NEXT: vrgather.vv v10, v9, v11, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret %tmp0 = shufflevector <4 x half> %v0, <4 x half> %v1, <4 x i32> ret <4 x half> %tmp0 @@ -294,11 +322,13 @@ define <8 x half> @trn1.v8f16(<8 x half> %v0, <8 x half> %v1) { ; CHECK-LABEL: trn1.v8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vid.v v11 +; CHECK-NEXT: vrgather.vv v10, v8, v11 ; CHECK-NEXT: li a0, 170 ; CHECK-NEXT: vmv.s.x v0, a0 -; CHECK-NEXT: vadd.vi v10, v10, -1 -; CHECK-NEXT: vrgather.vv v8, v9, v10, v0.t +; CHECK-NEXT: vadd.vi v8, v11, -1 +; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret %tmp0 = shufflevector <8 x half> %v0, <8 x half> %v1, <8 x i32> ret <8 x half> %tmp0 @@ -308,11 +338,13 @@ define <8 x half> @trn2.v8f16(<8 x half> %v0, <8 x half> %v1) { ; CHECK-LABEL: trn2.v8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; CHECK-NEXT: vslidedown.vi v8, v8, 1 +; CHECK-NEXT: vid.v v11 +; CHECK-NEXT: vadd.vi v12, v11, 1 ; CHECK-NEXT: li a0, 170 ; CHECK-NEXT: vmv.s.x v0, a0 -; CHECK-NEXT: vid.v v10 -; CHECK-NEXT: vrgather.vv v8, v9, v10, v0.t +; CHECK-NEXT: vrgather.vv v10, v8, v12 +; CHECK-NEXT: vrgather.vv v10, v9, v11, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret %tmp0 = shufflevector <8 x half> %v0, <8 x half> %v1, <8 x i32> ret <8 x half> %tmp0 diff --git a/llvm/test/CodeGen/SPARC/reserved-arg-regs.ll b/llvm/test/CodeGen/SPARC/reserved-arg-regs.ll new file mode 100644 index 00000000000000..3587ecb7f3c94a --- /dev/null +++ b/llvm/test/CodeGen/SPARC/reserved-arg-regs.ll @@ -0,0 +1,25 @@ +;; Test reserving argument registers. +; RUN: not llc < %s -mtriple=sparc-linux-gnu -mattr=+reserve-o0 2>&1 | FileCheck %s --check-prefixes=CHECK-RESERVED-O0 +; RUN: not llc < %s -mtriple=sparc64-linux-gnu -mattr=+reserve-o0 2>&1 | FileCheck %s --check-prefixes=CHECK-RESERVED-O0 +; RUN: not llc < %s -mtriple=sparc-linux-gnu -mattr=+reserve-i0 2>&1 | FileCheck %s --check-prefixes=CHECK-RESERVED-I0 +; RUN: not llc < %s -mtriple=sparc64-linux-gnu -mattr=+reserve-i0 2>&1 | FileCheck %s --check-prefixes=CHECK-RESERVED-I0 + +; CHECK-RESERVED-O0: error: +; CHECK-RESERVED-O0-SAME: SPARC doesn't support function calls if any of the argument registers is reserved. +; CHECK-RESERVED-I0: error: +; CHECK-RESERVED-I0-SAME: SPARC doesn't support function calls if any of the argument registers is reserved. +define void @call_function() { + call void @foo() + ret void +} +declare void @foo() + +; CHECK-RESERVED-O0: error: +; CHECK-RESERVED-O0-SAME: SPARC doesn't support function calls if any of the argument registers is reserved. +; CHECK-RESERVED-I0: error: +; CHECK-RESERVED-I0-SAME: SPARC doesn't support function calls if any of the argument registers is reserved. +define void @call_function_with_arg(i8 %in) { + call void @bar(i8 %in) + ret void +} +declare void @bar(i8) diff --git a/llvm/test/CodeGen/SPARC/reserved-regs-named.ll b/llvm/test/CodeGen/SPARC/reserved-regs-named.ll new file mode 100644 index 00000000000000..91808be156c559 --- /dev/null +++ b/llvm/test/CodeGen/SPARC/reserved-regs-named.ll @@ -0,0 +1,13 @@ +; RUN: llc -mtriple=sparc64-linux-gnu -mattr=+reserve-l0 -o - %s | FileCheck %s --check-prefixes=CHECK-RESERVED-L0 + +;; Ensure explicit register references are catched as well. + +; CHECK-RESERVED-L0: %l0 +define void @set_reg(i32 zeroext %x) { +entry: + tail call void @llvm.write_register.i32(metadata !0, i32 %x) + ret void +} + +declare void @llvm.write_register.i32(metadata, i32) +!0 = !{!"l0"} diff --git a/llvm/test/CodeGen/SPARC/reserved-regs-unavailable.ll b/llvm/test/CodeGen/SPARC/reserved-regs-unavailable.ll new file mode 100644 index 00000000000000..53ca045f100443 --- /dev/null +++ b/llvm/test/CodeGen/SPARC/reserved-regs-unavailable.ll @@ -0,0 +1,14 @@ +; RUN: not --crash llc -mtriple=sparc64-linux-gnu -o - %s 2>&1 | FileCheck %s --check-prefixes=CHECK-RESERVED-L0 + +;; Ensure explicit register references for non-reserved registers +;; are caught properly. + +; CHECK-RESERVED-L0: LLVM ERROR: Invalid register name global variable +define void @set_reg(i32 zeroext %x) { +entry: + tail call void @llvm.write_register.i32(metadata !0, i32 %x) + ret void +} + +declare void @llvm.write_register.i32(metadata, i32) +!0 = !{!"l0"} diff --git a/llvm/test/CodeGen/SPARC/reserved-regs.ll b/llvm/test/CodeGen/SPARC/reserved-regs.ll index ec6290586eeef2..7dea1f31538b84 100644 --- a/llvm/test/CodeGen/SPARC/reserved-regs.ll +++ b/llvm/test/CodeGen/SPARC/reserved-regs.ll @@ -1,5 +1,14 @@ ; RUN: llc -march=sparc -verify-machineinstrs < %s | FileCheck %s +;; Test reserve-* options. +; RUN: llc -mtriple=sparc64-linux-gnu -mattr=+reserve-g1 -o - %s | FileCheck %s --check-prefixes=CHECK-RESERVED-G1 +; RUN: llc -mtriple=sparc64-linux-gnu -mattr=+reserve-o1 -o - %s | FileCheck %s --check-prefixes=CHECK-RESERVED-O1 +; RUN: llc -mtriple=sparc64-linux-gnu -mattr=+reserve-l1 -o - %s | FileCheck %s --check-prefixes=CHECK-RESERVED-L1 +; RUN: llc -mtriple=sparc64-linux-gnu -mattr=+reserve-i1 -o - %s | FileCheck %s --check-prefixes=CHECK-RESERVED-I1 + +;; Test multiple reserve-* options together. +; RUN: llc -mtriple=sparc64-linux-gnu -mattr=+reserve-g1 -mattr=+reserve-o1 -mattr=+reserve-l1 -mattr=+reserve-i1 -o - %s | FileCheck %s --check-prefixes=CHECK-RESERVED-G1,CHECK-RESERVED-O1,CHECK-RESERVED-L1,CHECK-RESERVED-I1 + @g = common global [32 x i32] zeroinitializer, align 16 @h = common global [16 x i64] zeroinitializer, align 16 @@ -16,6 +25,10 @@ ; CHECK-NOT: %o6 ; CHECK-NOT: %i6 ; CHECK-NOT: %i7 +; CHECK-RESERVED-G1-NOT: %g1 +; CHECK-RESERVED-O1-NOT: %o1 +; CHECK-RESERVED-L1-NOT: %l1 +; CHECK-RESERVED-I1-NOT: %i1 ; CHECK: ret define void @use_all_i32_regs() { entry: @@ -100,6 +113,10 @@ entry: ; CHECK-NOT: %o7 ; CHECK-NOT: %i6 ; CHECK-NOT: %i7 +; CHECK-RESERVED-G1-NOT: %g1 +; CHECK-RESERVED-O1-NOT: %o1 +; CHECK-RESERVED-L1-NOT: %l1 +; CHECK-RESERVED-I1-NOT: %i1 ; CHECK: ret define void @use_all_i64_regs() { entry: diff --git a/llvm/test/CodeGen/X86/load-combine.ll b/llvm/test/CodeGen/X86/load-combine.ll index 7f8115dc1ce389..b5f3e789918813 100644 --- a/llvm/test/CodeGen/X86/load-combine.ll +++ b/llvm/test/CodeGen/X86/load-combine.ll @@ -1282,3 +1282,35 @@ define i32 @zext_load_i32_by_i8_bswap_shl_16(ptr %arg) { %tmp8 = or i32 %tmp7, %tmp30 ret i32 %tmp8 } + +define i32 @pr80911_vector_load_multiuse(ptr %ptr, ptr %clobber) nounwind { +; CHECK-LABEL: pr80911_vector_load_multiuse: +; CHECK: # %bb.0: +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: movl (%edx), %esi +; CHECK-NEXT: movzwl (%edx), %eax +; CHECK-NEXT: movl $0, (%ecx) +; CHECK-NEXT: movl %esi, (%edx) +; CHECK-NEXT: popl %esi +; CHECK-NEXT: retl +; +; CHECK64-LABEL: pr80911_vector_load_multiuse: +; CHECK64: # %bb.0: +; CHECK64-NEXT: movl (%rdi), %ecx +; CHECK64-NEXT: movzwl (%rdi), %eax +; CHECK64-NEXT: movl $0, (%rsi) +; CHECK64-NEXT: movl %ecx, (%rdi) +; CHECK64-NEXT: retq + %load = load <4 x i8>, ptr %ptr, align 16 + store i32 0, ptr %clobber + store <4 x i8> %load, ptr %ptr, align 16 + %e1 = extractelement <4 x i8> %load, i64 1 + %e1.ext = zext i8 %e1 to i32 + %e1.ext.shift = shl nuw nsw i32 %e1.ext, 8 + %e0 = extractelement <4 x i8> %load, i64 0 + %e0.ext = zext i8 %e0 to i32 + %res = or i32 %e1.ext.shift, %e0.ext + ret i32 %res +} diff --git a/llvm/test/MC/Mips/macro-la-pic.s b/llvm/test/MC/Mips/macro-la-pic.s index 2303f34c35bcfe..1875952d80c4e7 100644 --- a/llvm/test/MC/Mips/macro-la-pic.s +++ b/llvm/test/MC/Mips/macro-la-pic.s @@ -255,3 +255,25 @@ la $25, 2f # XN32: lw $25, %got_disp(.Ltmp1)($gp) # encoding: [0x8f,0x99,A,A] # XN32: # fixup A - offset: 0, value: %got_disp(.Ltmp1), kind: fixup_Mips_GOT_DISP 2: + +la $2,.Lstr +# O32: lw $2, %got(.Lstr)($gp) # encoding: [0x8f,0x82,A,A] +# O32-NEXT: # fixup A - offset: 0, value: %got(.Lstr), kind: fixup_Mips_GOT +# O32-NEXT: addiu $2, $2, %lo(.Lstr) # encoding: [0x24,0x42,A,A] +# O32-NEXT: # fixup A - offset: 0, value: %lo(.Lstr), kind: fixup_Mips_LO16 + +# N32: lw $2, %got_disp(.Lstr)($gp) # encoding: [0x8f,0x82,A,A] +# N32-NEXT: # fixup A - offset: 0, value: %got_disp(.Lstr), kind: fixup_Mips_GOT_DISP + +la $2,$str2 +# O32: lw $2, %got($str2)($gp) # encoding: [0x8f,0x82,A,A] +# O32-NEXT: # fixup A - offset: 0, value: %got($str2), kind: fixup_Mips_GOT +# O32-NEXT: addiu $2, $2, %lo($str2) # encoding: [0x24,0x42,A,A] +# O32-NEXT: # fixup A - offset: 0, value: %lo($str2), kind: fixup_Mips_LO16 + +# N32: lw $2, %got_disp($str2)($gp) # encoding: [0x8f,0x82,A,A] +# N32-NEXT: # fixup A - offset: 0, value: %got_disp($str2), kind: fixup_Mips_GOT_DISP + +.rodata +.Lstr: .4byte 0 +$str2: .4byte 0 diff --git a/llvm/test/Transforms/Attributor/nofpclass-fpext.ll b/llvm/test/Transforms/Attributor/nofpclass-fpext.ll index 0ba114117ceec6..ee36f949529d4f 100644 --- a/llvm/test/Transforms/Attributor/nofpclass-fpext.ll +++ b/llvm/test/Transforms/Attributor/nofpclass-fpext.ll @@ -142,7 +142,7 @@ define double @ret_fpext_f32_to_f64_nosub(float nofpclass(sub) %arg0) { } define double @ret_fpext_f32_to_f64_nonorm(float nofpclass(norm) %arg0) { -; CHECK-LABEL: define nofpclass(sub norm) double @ret_fpext_f32_to_f64_nonorm +; CHECK-LABEL: define nofpclass(sub) double @ret_fpext_f32_to_f64_nonorm ; CHECK-SAME: (float nofpclass(sub norm) [[ARG0:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[EXT:%.*]] = fpext float [[ARG0]] to double ; CHECK-NEXT: ret double [[EXT]] @@ -482,7 +482,37 @@ define double @ret_fpext_bf16_f64_nosub(bfloat nofpclass(sub) %arg0) { } define double @ret_fpext_bf16_f64_nonorm(bfloat nofpclass(norm) %arg0) { -; CHECK-LABEL: define nofpclass(sub norm) double @ret_fpext_bf16_f64_nonorm +; CHECK-LABEL: define nofpclass(sub) double @ret_fpext_bf16_f64_nonorm +; CHECK-SAME: (bfloat nofpclass(sub norm) [[ARG0:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[EXT:%.*]] = fpext bfloat [[ARG0]] to double +; CHECK-NEXT: ret double [[EXT]] +; + %ext = fpext bfloat %arg0 to double + ret double %ext +} + +define double @ret_fpext_bf16_f64_nonorm_psub(bfloat nofpclass(norm psub) %arg0) { +; CHECK-LABEL: define nofpclass(sub pnorm) double @ret_fpext_bf16_f64_nonorm_psub +; CHECK-SAME: (bfloat nofpclass(sub norm) [[ARG0:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[EXT:%.*]] = fpext bfloat [[ARG0]] to double +; CHECK-NEXT: ret double [[EXT]] +; + %ext = fpext bfloat %arg0 to double + ret double %ext +} + +define double @ret_fpext_bf16_f64_nonorm_nsub(bfloat nofpclass(norm nsub) %arg0) { +; CHECK-LABEL: define nofpclass(sub nnorm) double @ret_fpext_bf16_f64_nonorm_nsub +; CHECK-SAME: (bfloat nofpclass(sub norm) [[ARG0:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[EXT:%.*]] = fpext bfloat [[ARG0]] to double +; CHECK-NEXT: ret double [[EXT]] +; + %ext = fpext bfloat %arg0 to double + ret double %ext +} + +define double @ret_fpext_bf16_f64_nonorm_sub(bfloat nofpclass(norm sub) %arg0) { +; CHECK-LABEL: define nofpclass(sub norm) double @ret_fpext_bf16_f64_nonorm_sub ; CHECK-SAME: (bfloat nofpclass(sub norm) [[ARG0:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[EXT:%.*]] = fpext bfloat [[ARG0]] to double ; CHECK-NEXT: ret double [[EXT]] diff --git a/llvm/test/Transforms/IndVarSimplify/pr55925.ll b/llvm/test/Transforms/IndVarSimplify/pr55925.ll index 420fc209949d4f..312a8295ccdc9f 100644 --- a/llvm/test/Transforms/IndVarSimplify/pr55925.ll +++ b/llvm/test/Transforms/IndVarSimplify/pr55925.ll @@ -18,9 +18,9 @@ define void @test(ptr %p) personality ptr undef { ; CHECK-NEXT: [[RES:%.*]] = invoke i32 @foo(i32 returned [[TMP0]]) ; CHECK-NEXT: to label [[LOOP_LATCH]] unwind label [[EXIT:%.*]] ; CHECK: loop.latch: -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[INDVARS_IV]] to i32 ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @foo(i32 [[TMP1]]) +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: br label [[LOOP]] ; CHECK: exit: ; CHECK-NEXT: [[LP:%.*]] = landingpad { ptr, i32 } @@ -64,8 +64,8 @@ define void @test_critedge(i1 %c, ptr %p) personality ptr undef { ; CHECK-NEXT: br label [[LOOP_LATCH]] ; CHECK: loop.latch: ; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ [[TMP1]], [[LOOP_INVOKE]] ], [ 0, [[LOOP_OTHER]] ] -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @foo(i32 [[PHI]]) +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: br label [[LOOP]] ; CHECK: exit: ; CHECK-NEXT: [[LP:%.*]] = landingpad { ptr, i32 } diff --git a/llvm/test/Transforms/IndVarSimplify/pr79861.ll b/llvm/test/Transforms/IndVarSimplify/pr79861.ll new file mode 100644 index 00000000000000..66250944961397 --- /dev/null +++ b/llvm/test/Transforms/IndVarSimplify/pr79861.ll @@ -0,0 +1,107 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -S -passes=indvars < %s | FileCheck %s + +target datalayout = "n64" + +declare void @use(i64) + +define void @or_disjoint() { +; CHECK-LABEL: define void @or_disjoint() { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 2, [[ENTRY:%.*]] ], [ [[IV_DEC:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[OR:%.*]] = or disjoint i64 [[IV]], 1 +; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[SEL:%.*]] = select i1 false, i64 [[OR]], i64 [[ADD]] +; CHECK-NEXT: call void @use(i64 [[SEL]]) +; CHECK-NEXT: [[IV_DEC]] = add nsw i64 [[IV]], -1 +; CHECK-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_DEC]], 0 +; CHECK-NEXT: br i1 [[EXIT_COND]], label [[EXIT:%.*]], label [[LOOP]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 2, %entry ], [ %iv.dec, %loop ] + %or = or disjoint i64 %iv, 1 + %add = add nsw i64 %iv, 1 + %sel = select i1 false, i64 %or, i64 %add + call void @use(i64 %sel) + + %iv.dec = add nsw i64 %iv, -1 + %exit.cond = icmp eq i64 %iv.dec, 0 + br i1 %exit.cond, label %exit, label %loop + +exit: + ret void +} + +define void @add_nowrap_flags(i64 %n) { +; CHECK-LABEL: define void @add_nowrap_flags( +; CHECK-SAME: i64 [[N:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_INC:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[ADD1:%.*]] = add i64 [[IV]], 123 +; CHECK-NEXT: call void @use(i64 [[ADD1]]) +; CHECK-NEXT: [[IV_INC]] = add i64 [[IV]], 1 +; CHECK-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_INC]], [[N]] +; CHECK-NEXT: br i1 [[EXIT_COND]], label [[EXIT:%.*]], label [[LOOP]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.inc, %loop ] + %add1 = add nuw nsw i64 %iv, 123 + %add2 = add i64 %iv, 123 + %sel = select i1 false, i64 %add1, i64 %add2 + call void @use(i64 %sel) + + %iv.inc = add i64 %iv, 1 + %exit.cond = icmp eq i64 %iv.inc, %n + br i1 %exit.cond, label %exit, label %loop + +exit: + ret void +} + + +define void @expander_or_disjoint(i64 %n) { +; CHECK-LABEL: define void @expander_or_disjoint( +; CHECK-SAME: i64 [[N:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[OR:%.*]] = or disjoint i64 [[N]], 1 +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1 +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_INC:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV_INC]] = add i64 [[IV]], 1 +; CHECK-NEXT: [[ADD:%.*]] = add i64 [[IV]], [[OR]] +; CHECK-NEXT: call void @use(i64 [[ADD]]) +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[IV_INC]], [[TMP0]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + %or = or disjoint i64 %n, 1 + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.inc, %loop ] + %iv.inc = add i64 %iv, 1 + %add = add i64 %iv, %or + call void @use(i64 %add) + %cmp = icmp ult i64 %iv, %n + br i1 %cmp, label %loop, label %exit + +exit: + ret void +} diff --git a/llvm/test/Transforms/InstCombine/getelementptr.ll b/llvm/test/Transforms/InstCombine/getelementptr.ll index 642c3eb2a0e41b..c90b6c9fb29592 100644 --- a/llvm/test/Transforms/InstCombine/getelementptr.ll +++ b/llvm/test/Transforms/InstCombine/getelementptr.ll @@ -116,6 +116,7 @@ define void @test_overaligned_vec(i8 %B) { ; CHECK-LABEL: @test_overaligned_vec( ; CHECK-NEXT: store i8 [[B:%.*]], ptr getelementptr inbounds ([10 x i8], ptr @Global, i64 0, i64 2), align 1 ; CHECK-NEXT: ret void +; %A = getelementptr <2 x half>, ptr @Global, i64 0, i64 1 store i8 %B, ptr %A ret void @@ -1473,6 +1474,16 @@ define ptr @gep_sdiv(ptr %p, i64 %off) { ret ptr %ptr } +define ptr @gep_udiv(ptr %p, i64 %off) { +; CHECK-LABEL: @gep_udiv( +; CHECK-NEXT: [[PTR:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 [[OFF:%.*]] +; CHECK-NEXT: ret ptr [[PTR]] +; + %index = udiv exact i64 %off, 7 + %ptr = getelementptr %struct.C, ptr %p, i64 %index + ret ptr %ptr +} + define <2 x ptr> @gep_sdiv_vec(<2 x ptr> %p, <2 x i64> %off) { ; CHECK-LABEL: @gep_sdiv_vec( ; CHECK-NEXT: [[PTR:%.*]] = getelementptr i8, <2 x ptr> [[P:%.*]], <2 x i64> [[OFF:%.*]] @@ -1503,6 +1514,16 @@ define ptr @gep_ashr(ptr %p, i64 %off) { ret ptr %ptr } +define ptr @gep_lshr(ptr %p, i64 %off) { +; CHECK-LABEL: @gep_lshr( +; CHECK-NEXT: [[PTR:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 [[OFF:%.*]] +; CHECK-NEXT: ret ptr [[PTR]] +; + %index = lshr exact i64 %off, 2 + %ptr = getelementptr i32, ptr %p, i64 %index + ret ptr %ptr +} + ; Negative tests define ptr @gep_i8(ptr %p, i64 %off) { @@ -1525,6 +1546,17 @@ define ptr @gep_sdiv_mismatched_size(ptr %p, i64 %off) { ret ptr %ptr } +define ptr @gep_udiv_mismatched_size(ptr %p, i64 %off) { +; CHECK-LABEL: @gep_udiv_mismatched_size( +; CHECK-NEXT: [[INDEX:%.*]] = udiv exact i64 [[OFF:%.*]], 20 +; CHECK-NEXT: [[PTR:%.*]] = getelementptr [[STRUCT_C:%.*]], ptr [[P:%.*]], i64 [[INDEX]] +; CHECK-NEXT: ret ptr [[PTR]] +; + %index = udiv exact i64 %off, 20 + %ptr = getelementptr %struct.C, ptr %p, i64 %index + ret ptr %ptr +} + define ptr @gep_sdiv_without_exact(ptr %p, i64 %off) { ; CHECK-LABEL: @gep_sdiv_without_exact( ; CHECK-NEXT: [[INDEX:%.*]] = sdiv i64 [[OFF:%.*]], 7 @@ -1536,6 +1568,17 @@ define ptr @gep_sdiv_without_exact(ptr %p, i64 %off) { ret ptr %ptr } +define ptr @gep_udiv_without_exact(ptr %p, i64 %off) { +; CHECK-LABEL: @gep_udiv_without_exact( +; CHECK-NEXT: [[INDEX:%.*]] = udiv i64 [[OFF:%.*]], 7 +; CHECK-NEXT: [[PTR:%.*]] = getelementptr [[STRUCT_C:%.*]], ptr [[P:%.*]], i64 [[INDEX]] +; CHECK-NEXT: ret ptr [[PTR]] +; + %index = udiv i64 %off, 7 + %ptr = getelementptr %struct.C, ptr %p, i64 %index + ret ptr %ptr +} + define ptr @gep_ashr_without_exact(ptr %p, i64 %off) { ; CHECK-LABEL: @gep_ashr_without_exact( ; CHECK-NEXT: [[INDEX:%.*]] = ashr i64 [[OFF:%.*]], 2 @@ -1547,6 +1590,17 @@ define ptr @gep_ashr_without_exact(ptr %p, i64 %off) { ret ptr %ptr } +define ptr @gep_lshr_without_exact(ptr %p, i64 %off) { +; CHECK-LABEL: @gep_lshr_without_exact( +; CHECK-NEXT: [[INDEX:%.*]] = lshr i64 [[OFF:%.*]], 2 +; CHECK-NEXT: [[PTR:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[INDEX]] +; CHECK-NEXT: ret ptr [[PTR]] +; + %index = lshr i64 %off, 2 + %ptr = getelementptr i32, ptr %p, i64 %index + ret ptr %ptr +} + define i1 @test_only_used_by_icmp(ptr %a, ptr %b, ptr %c) { ; CHECK-LABEL: @test_only_used_by_icmp( ; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr [[B:%.*]], [[C:%.*]] diff --git a/llvm/test/Transforms/InstCombine/pr80941.ll b/llvm/test/Transforms/InstCombine/pr80941.ll new file mode 100644 index 00000000000000..95242b1d1407bf --- /dev/null +++ b/llvm/test/Transforms/InstCombine/pr80941.ll @@ -0,0 +1,32 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -S -passes=instcombine < %s | FileCheck %s + +define float @pr80941(float %arg) { +; CHECK-LABEL: define float @pr80941( +; CHECK-SAME: float [[ARG:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[COND:%.*]] = tail call i1 @llvm.is.fpclass.f32(float [[ARG]], i32 144) +; CHECK-NEXT: br i1 [[COND]], label [[IF_THEN:%.*]], label [[IF_EXIT:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[FPEXT:%.*]] = fpext float [[ARG]] to double +; CHECK-NEXT: [[SIGN:%.*]] = call double @llvm.copysign.f64(double 0.000000e+00, double [[FPEXT]]) +; CHECK-NEXT: [[FPTRUNC:%.*]] = fptrunc double [[SIGN]] to float +; CHECK-NEXT: br label [[IF_EXIT]] +; CHECK: if.exit: +; CHECK-NEXT: [[RET:%.*]] = phi float [ [[FPTRUNC]], [[IF_THEN]] ], [ [[ARG]], [[ENTRY:%.*]] ] +; CHECK-NEXT: ret float [[RET]] +; +entry: + %cond = tail call i1 @llvm.is.fpclass.f32(float %arg, i32 144) + br i1 %cond, label %if.then, label %if.exit + +if.then: + %fpext = fpext float %arg to double + %sign = call double @llvm.copysign.f64(double 0.000000e+00, double %fpext) + %fptrunc = fptrunc double %sign to float + br label %if.exit + +if.exit: + %ret = phi float [ %fptrunc, %if.then ], [ %arg, %entry ] + ret float %ret +} diff --git a/llvm/test/Transforms/LowerTypeTests/cfi-annotation.ll b/llvm/test/Transforms/LowerTypeTests/cfi-annotation.ll new file mode 100644 index 00000000000000..034af89112cb63 --- /dev/null +++ b/llvm/test/Transforms/LowerTypeTests/cfi-annotation.ll @@ -0,0 +1,68 @@ +; REQUIRES: aarch64-registered-target + +; RUN: opt -passes=lowertypetests %s -o %t.o +; RUN: llvm-dis %t.o -o - | FileCheck %s --check-prefix=CHECK-foobar +; CHECK-foobar: {{llvm.global.annotations = .*[foo|bar], .*[foo|bar],}} +; RUN: llvm-dis %t.o -o - | FileCheck %s --check-prefix=CHECK-cfi +; CHECK-cfi-NOT: {{llvm.global.annotations = .*cfi.*}} + +target triple = "aarch64-none-linux-gnu" + +@.src = private unnamed_addr constant [7 x i8] c"test.c\00", align 1 +@.str = private unnamed_addr constant [30 x i8] c"annotation_string_literal_bar\00", section "llvm.metadata" +@.str.1 = private unnamed_addr constant [7 x i8] c"test.c\00", section "llvm.metadata" +@.str.2 = private unnamed_addr constant [30 x i8] c"annotation_string_literal_foo\00", section "llvm.metadata" +@llvm.global.annotations = appending global [2 x { ptr, ptr, ptr, i32, ptr }] [{ ptr, ptr, ptr, i32, ptr } { ptr @bar, ptr @.str, ptr @.str.1, i32 2, ptr null }, { ptr, ptr, ptr, i32, ptr } { ptr @foo, ptr @.str.2, ptr @.str.1, i32 1, ptr null }], section "llvm.metadata" + +define i32 @bar(i32 noundef %0) #0 !type !8 !type !9 { + %2 = alloca i32, align 4 + store i32 %0, ptr %2, align 4 + %3 = load i32, ptr %2, align 4 + %4 = call i32 @foo(i32 noundef %3) + ret i32 %4 +} + +declare !type !8 !type !9 i32 @foo(i32 noundef) #1 + +define i32 @test(i32 noundef %0) #0 !type !8 !type !9 { + %2 = alloca i32, align 4 + %3 = alloca ptr, align 8 + store i32 %0, ptr %2, align 4 + %4 = load i32, ptr %2, align 4 + %5 = icmp sgt i32 %4, 0 + %6 = zext i1 %5 to i64 + %7 = select i1 %5, ptr @foo, ptr @bar + store ptr %7, ptr %3, align 8 + %8 = load ptr, ptr %3, align 8 + %9 = call i1 @llvm.type.test(ptr %8, metadata !"_ZTSFiiE"), !nosanitize !10 + br i1 %9, label %11, label %10, !nosanitize !10 + +10: + call void @llvm.ubsantrap(i8 2) #4, !nosanitize !10 + unreachable, !nosanitize !10 + +11: + %12 = load i32, ptr %2, align 4 + %13 = call i32 %8(i32 noundef %12) + ret i32 %13 +} + +declare i1 @llvm.type.test(ptr, metadata) +declare void @llvm.ubsantrap(i8 immarg) + +attributes #0 = { noinline nounwind optnone uwtable "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+fp-armv8,+neon,+v8a,-fmv" } +attributes #1 = { "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+fp-armv8,+neon,+v8a,-fmv" } +attributes #4 = { noreturn nounwind } + +!llvm.module.flags = !{!0, !1, !2, !3, !4, !5, !6} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 4, !"CFI Canonical Jump Tables", i32 0} +!2 = !{i32 8, !"PIC Level", i32 2} +!3 = !{i32 7, !"uwtable", i32 2} +!4 = !{i32 7, !"frame-pointer", i32 1} +!5 = !{i32 1, !"ThinLTO", i32 0} +!6 = !{i32 1, !"EnableSplitLTOUnit", i32 1} +!8 = !{i64 0, !"_ZTSFiiE"} +!9 = !{i64 0, !"_ZTSFiiE.generalized"} +!10 = !{} diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extractelement-multi-register-use.ll b/llvm/test/Transforms/SLPVectorizer/X86/extractelement-multi-register-use.ll new file mode 100644 index 00000000000000..ba406c8f20bb08 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/extractelement-multi-register-use.ll @@ -0,0 +1,107 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu -mcpu=x86-64-v3 < %s | FileCheck %s + +define void @test(double %i) { +; CHECK-LABEL: define void @test( +; CHECK-SAME: double [[I:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: bb: +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[I]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = fsub <2 x double> zeroinitializer, [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> , double [[I]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = fsub <2 x double> zeroinitializer, [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x double> [[TMP3]], i32 1 +; CHECK-NEXT: [[TMP5:%.*]] = fsub <2 x double> [[TMP0]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> [[TMP3]], <4 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> [[TMP5]], <4 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x double> [[TMP6]], <4 x double> [[TMP7]], <8 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <8 x double> [[TMP8]], <8 x double> , <8 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <8 x double> [[TMP9]], double [[TMP4]], i32 7 +; CHECK-NEXT: [[TMP11:%.*]] = fmul <8 x double> zeroinitializer, [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = fadd <8 x double> zeroinitializer, [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = fadd <8 x double> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = fcmp ult <8 x double> [[TMP13]], zeroinitializer +; CHECK-NEXT: br label [[BB116:%.*]] +; CHECK: bb116: +; CHECK-NEXT: [[TMP15:%.*]] = fmul <2 x double> zeroinitializer, [[TMP5]] +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x double> [[TMP15]], i32 0 +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <2 x double> [[TMP15]], i32 1 +; CHECK-NEXT: [[I120:%.*]] = fadd double [[TMP16]], [[TMP17]] +; CHECK-NEXT: [[TMP18:%.*]] = fmul <2 x double> zeroinitializer, [[TMP1]] +; CHECK-NEXT: [[TMP19:%.*]] = fmul <2 x double> zeroinitializer, [[TMP3]] +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x double> [[TMP18]], i32 0 +; CHECK-NEXT: [[TMP21:%.*]] = extractelement <2 x double> [[TMP18]], i32 1 +; CHECK-NEXT: [[I128:%.*]] = fadd double [[TMP20]], [[TMP21]] +; CHECK-NEXT: [[I139:%.*]] = call double @llvm.maxnum.f64(double [[I128]], double 0.000000e+00) +; CHECK-NEXT: [[TMP22:%.*]] = fadd <2 x double> [[TMP19]], zeroinitializer +; CHECK-NEXT: [[TMP23:%.*]] = call <2 x double> @llvm.maxnum.v2f64(<2 x double> [[TMP22]], <2 x double> zeroinitializer) +; CHECK-NEXT: [[TMP24:%.*]] = fmul <2 x double> [[TMP23]], zeroinitializer +; CHECK-NEXT: [[TMP25:%.*]] = fptosi <2 x double> [[TMP24]] to <2 x i32> +; CHECK-NEXT: [[TMP26:%.*]] = sub <2 x i32> zeroinitializer, [[TMP25]] +; CHECK-NEXT: [[TMP27:%.*]] = icmp sgt <2 x i32> [[TMP26]], zeroinitializer +; CHECK-NEXT: [[I147:%.*]] = fcmp ogt double [[I120]], 0.000000e+00 +; CHECK-NEXT: ret void +; +bb: + %i74 = fsub double 0.000000e+00, poison + %i75 = fsub double 0.000000e+00, %i + %i76 = fmul double 0.000000e+00, %i75 + %i77 = fadd double %i76, 0.000000e+00 + %i78 = fadd double %i77, 0.000000e+00 + %i79 = fcmp ult double %i78, 0.000000e+00 + %i81 = fsub double %i, 0.000000e+00 + %i82 = fmul double 0.000000e+00, %i81 + %i83 = fadd double 0.000000e+00, %i82 + %i84 = fadd double %i83, 0.000000e+00 + %i85 = fcmp ult double %i84, 0.000000e+00 + %i86 = fsub double 0.000000e+00, %i + %i87 = fmul double 0.000000e+00, %i86 + %i88 = fadd double %i87, 0.000000e+00 + %i89 = fadd double %i88, 0.000000e+00 + %i90 = fcmp ult double %i89, 0.000000e+00 + %i91 = fsub double 0.000000e+00, 0.000000e+00 + %i92 = fmul double 0.000000e+00, 0.000000e+00 + %i93 = fadd double %i92, 0.000000e+00 + %i94 = fadd double %i93, 0.000000e+00 + %i95 = fcmp ult double %i94, 0.000000e+00 + %i96 = fsub double poison, 0.000000e+00 + %i97 = fadd double %i77, 0.000000e+00 + %i98 = fcmp ult double %i97, 0.000000e+00 + %i99 = fadd double %i83, 0.000000e+00 + %i100 = fcmp ult double %i99, 0.000000e+00 + %i101 = fmul double 0.000000e+00, 0.000000e+00 + %i102 = fadd double %i101, 0.000000e+00 + %i103 = fadd double %i102, 0.000000e+00 + %i104 = fcmp ult double %i103, 0.000000e+00 + %i105 = fmul double 0.000000e+00, 0.000000e+00 + %i106 = fadd double %i105, 0.000000e+00 + %i107 = fadd double %i106, 0.000000e+00 + %i108 = fcmp ult double %i107, 0.000000e+00 + br label %bb116 + +bb116: + %i117 = fmul double 0.000000e+00, %i81 + %i119 = fmul double 0.000000e+00, %i96 + %i120 = fadd double %i117, %i119 + %i121 = fmul double 0.000000e+00, %i74 + %i122 = fmul double 0.000000e+00, %i75 + %i123 = fadd double %i122, 0.000000e+00 + %i124 = fmul double 0.000000e+00, %i91 + %i125 = fadd double %i124, 0.000000e+00 + %i127 = fmul double 0.000000e+00, %i86 + %i128 = fadd double %i127, %i121 + %i133 = call double @llvm.maxnum.f64(double %i123, double 0.000000e+00) + %i134 = fmul double %i133, 0.000000e+00 + %i135 = fptosi double %i134 to i32 + %i136 = sub i32 0, %i135 + %i137 = icmp sgt i32 %i136, 0 + %i139 = call double @llvm.maxnum.f64(double %i128, double 0.000000e+00) + %i142 = call double @llvm.maxnum.f64(double %i125, double 0.000000e+00) + %i143 = fmul double %i142, 0.000000e+00 + %i144 = fptosi double %i143 to i32 + %i145 = sub i32 0, %i144 + %i146 = icmp sgt i32 %i145, 0 + %i147 = fcmp ogt double %i120, 0.000000e+00 + ret void +} + +declare double @llvm.maxnum.f64(double, double) diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extractelement-single-use-many-nodes.ll b/llvm/test/Transforms/SLPVectorizer/X86/extractelement-single-use-many-nodes.ll new file mode 100644 index 00000000000000..f665dac3282b79 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/extractelement-single-use-many-nodes.ll @@ -0,0 +1,144 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -mcpu=x86-64-v3 -S < %s | FileCheck %s + +define void @foo(double %i) { +; CHECK-LABEL: define void @foo( +; CHECK-SAME: double [[I:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: bb: +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x double> , double [[I]], i32 2 +; CHECK-NEXT: [[TMP1:%.*]] = fsub <4 x double> zeroinitializer, [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x double> [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> poison, double [[I]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = fsub <2 x double> zeroinitializer, [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP4]], i32 1 +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <8 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x double> [[TMP6]], <8 x double> , <8 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x double> [[TMP7]], double [[TMP2]], i32 3 +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <8 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <8 x double> [[TMP9]], <8 x double> , <8 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = insertelement <8 x double> [[TMP10]], double [[TMP5]], i32 6 +; CHECK-NEXT: [[TMP12:%.*]] = fmul <8 x double> [[TMP8]], [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = fadd <8 x double> zeroinitializer, [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = fadd <8 x double> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = fcmp ult <8 x double> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = freeze <8 x i1> [[TMP15]] +; CHECK-NEXT: [[TMP17:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP16]]) +; CHECK-NEXT: br i1 [[TMP17]], label [[BB58:%.*]], label [[BB115:%.*]] +; CHECK: bb115: +; CHECK-NEXT: [[TMP18:%.*]] = fmul <2 x double> zeroinitializer, [[TMP4]] +; CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x double> [[TMP18]], i32 0 +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x double> [[TMP18]], i32 1 +; CHECK-NEXT: [[I118:%.*]] = fadd double [[TMP19]], [[TMP20]] +; CHECK-NEXT: [[TMP21:%.*]] = fmul <4 x double> zeroinitializer, [[TMP1]] +; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <4 x double> , <4 x double> [[TMP22]], <4 x i32> +; CHECK-NEXT: [[TMP24:%.*]] = fadd <4 x double> [[TMP21]], [[TMP23]] +; CHECK-NEXT: [[TMP25:%.*]] = fadd <4 x double> [[TMP24]], zeroinitializer +; CHECK-NEXT: [[TMP26:%.*]] = select <4 x i1> zeroinitializer, <4 x double> zeroinitializer, <4 x double> [[TMP25]] +; CHECK-NEXT: [[TMP27:%.*]] = fmul <4 x double> [[TMP26]], zeroinitializer +; CHECK-NEXT: [[TMP28:%.*]] = fmul <4 x double> [[TMP27]], zeroinitializer +; CHECK-NEXT: [[TMP29:%.*]] = fptosi <4 x double> [[TMP28]] to <4 x i32> +; CHECK-NEXT: [[TMP30:%.*]] = or <4 x i32> zeroinitializer, [[TMP29]] +; CHECK-NEXT: [[TMP31:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP30]]) +; CHECK-NEXT: [[OP_RDX:%.*]] = icmp slt i32 [[TMP31]], 32000 +; CHECK-NEXT: [[OP_RDX1:%.*]] = select i1 [[OP_RDX]], i32 [[TMP31]], i32 32000 +; CHECK-NEXT: [[I163:%.*]] = fcmp ogt double [[I118]], 0.000000e+00 +; CHECK-NEXT: [[I164:%.*]] = icmp slt i32 0, [[OP_RDX1]] +; CHECK-NEXT: unreachable +; CHECK: bb58: +; CHECK-NEXT: ret void +; +bb: + %i75 = fsub double 0.000000e+00, 0.000000e+00 + %i76 = fsub double 0.000000e+00, 0.000000e+00 + %i77 = fmul double 0.000000e+00, %i75 + %i78 = fmul double 0.000000e+00, %i76 + %i79 = fadd double %i78, 0.000000e+00 + %i80 = fadd double %i79, 0.000000e+00 + %i81 = fcmp ult double %i80, 0.000000e+00 + %i82 = fsub double 0.000000e+00, poison + %i83 = fmul double 0.000000e+00, %i82 + %i84 = fadd double 0.000000e+00, %i83 + %i85 = fadd double %i84, 0.000000e+00 + %i86 = fcmp ult double %i85, 0.000000e+00 + %i87 = fsub double 0.000000e+00, %i + %i88 = fadd double 0.000000e+00, %i77 + %i89 = fadd double %i88, 0.000000e+00 + %i90 = fcmp ult double %i89, 0.000000e+00 + %i91 = fsub double 0.000000e+00, 0.000000e+00 + %i92 = fmul double poison, 0.000000e+00 + %i93 = fadd double %i92, 0.000000e+00 + %i94 = fadd double %i93, 0.000000e+00 + %i95 = fcmp ult double %i94, 0.000000e+00 + %i96 = fadd double %i79, 0.000000e+00 + %i97 = fcmp ult double %i96, 0.000000e+00 + %i98 = fadd double %i84, 0.000000e+00 + %i99 = fcmp ult double %i98, 0.000000e+00 + %i100 = fadd double 0.000000e+00, %i77 + %i101 = fadd double %i100, 0.000000e+00 + %i102 = fcmp ult double %i101, 0.000000e+00 + %i103 = fsub double 0.000000e+00, %i + %i104 = fmul double poison, 0.000000e+00 + %i105 = fadd double %i104, 0.000000e+00 + %i106 = fadd double %i105, 0.000000e+00 + %i107 = fcmp ult double %i106, 0.000000e+00 + %i108 = select i1 %i107, i1 %i102, i1 false + %i109 = select i1 %i108, i1 %i99, i1 false + %i110 = select i1 %i109, i1 %i97, i1 false + %i111 = select i1 %i110, i1 %i95, i1 false + %i112 = select i1 %i111, i1 %i90, i1 false + %i113 = select i1 %i112, i1 %i86, i1 false + %i114 = select i1 %i113, i1 %i81, i1 false + br i1 %i114, label %bb58, label %bb115 + +bb115: + %i116 = fmul double 0.000000e+00, %i103 + %i117 = fmul double 0.000000e+00, %i82 + %i118 = fadd double %i116, %i117 + %i120 = fmul double 0.000000e+00, %i75 + %i121 = fmul double 0.000000e+00, %i76 + %i122 = fadd double %i121, 0.000000e+00 + %i123 = fadd double 0.000000e+00, %i120 + %i124 = fmul double 0.000000e+00, %i91 + %i125 = fadd double %i124, %i82 + %i126 = fadd double %i125, 0.000000e+00 + %i127 = fmul double 0.000000e+00, %i87 + %i128 = fadd double %i127, 0.000000e+00 + %i129 = fadd double %i128, 0.000000e+00 + %i130 = fadd double %i122, 0.000000e+00 + %i131 = fadd double %i123, 0.000000e+00 + %i132 = select i1 false, double 0.000000e+00, double %i131 + %i133 = fmul double %i132, 0.000000e+00 + %i134 = fmul double %i133, 0.000000e+00 + %i135 = fptosi double %i134 to i32 + %i136 = or i32 0, %i135 + %i137 = icmp slt i32 %i136, 32000 + %i138 = select i1 %i137, i32 %i136, i32 32000 + %i139 = select i1 false, double 0.000000e+00, double %i130 + %i140 = fmul double %i139, 0.000000e+00 + %i141 = fmul double %i140, 0.000000e+00 + %i142 = fptosi double %i141 to i32 + %i143 = or i32 0, %i142 + %i144 = icmp slt i32 %i143, %i138 + %i145 = select i1 %i144, i32 %i143, i32 %i138 + %i146 = select i1 false, double 0.000000e+00, double %i129 + %i147 = fmul double %i146, 0.000000e+00 + %i148 = fmul double %i147, 0.000000e+00 + %i149 = fptosi double %i148 to i32 + %i150 = or i32 0, %i149 + %i151 = icmp slt i32 %i150, %i145 + %i152 = select i1 %i151, i32 %i150, i32 %i145 + %i153 = select i1 false, double 0.000000e+00, double %i126 + %i154 = fmul double %i153, 0.000000e+00 + %i155 = fmul double %i154, 0.000000e+00 + %i156 = fptosi double %i155 to i32 + %i157 = or i32 0, %i156 + %i158 = icmp slt i32 %i157, %i152 + %i159 = select i1 %i158, i32 %i157, i32 %i152 + %i163 = fcmp ogt double %i118, 0.000000e+00 + %i164 = icmp slt i32 0, %i159 + unreachable + +bb58: + ret void +} diff --git a/llvm/test/lit.cfg.py b/llvm/test/lit.cfg.py index c6f9ee82e08cc1..74e7769a480598 100644 --- a/llvm/test/lit.cfg.py +++ b/llvm/test/lit.cfg.py @@ -414,10 +414,11 @@ def version_int(ver): config.available_features.add("llvm-dylib") config.substitutions.append( ( + # libLLVM.so.19.0git "%llvmdylib", - "{}/libLLVM-{}{}".format( - config.llvm_shlib_dir, config.llvm_dylib_version, config.llvm_shlib_ext - ), + "{}/libLLVM{}.{}".format( + config.llvm_shlib_dir, config.llvm_shlib_ext, config.llvm_dylib_version + ) ) ) diff --git a/llvm/test/lit.site.cfg.py.in b/llvm/test/lit.site.cfg.py.in index 1138b2ccf7bce7..b6f255d472d16f 100644 --- a/llvm/test/lit.site.cfg.py.in +++ b/llvm/test/lit.site.cfg.py.in @@ -44,7 +44,7 @@ config.build_examples = @LLVM_BUILD_EXAMPLES@ config.enable_threads = @LLVM_ENABLE_THREADS@ config.build_shared_libs = @BUILD_SHARED_LIBS@ config.link_llvm_dylib = @LLVM_LINK_LLVM_DYLIB@ -config.llvm_dylib_version = "@LLVM_VERSION_MAJOR@@LLVM_VERSION_SUFFIX@" +config.llvm_dylib_version = "@LLVM_VERSION_MAJOR@.@LLVM_VERSION_MINOR@@LLVM_VERSION_SUFFIX@" config.llvm_host_triple = '@LLVM_HOST_TRIPLE@' config.host_arch = "@HOST_ARCH@" config.have_opt_viewer_modules = @LLVM_HAVE_OPT_VIEWER_MODULES@ diff --git a/llvm/test/tools/llvm-objcopy/ELF/binary-output-target.test b/llvm/test/tools/llvm-objcopy/ELF/binary-output-target.test index fc5856691f8dca..f88b7575002a94 100644 --- a/llvm/test/tools/llvm-objcopy/ELF/binary-output-target.test +++ b/llvm/test/tools/llvm-objcopy/ELF/binary-output-target.test @@ -48,6 +48,9 @@ # RUN: llvm-objcopy -I binary -O elf64-loongarch %t.txt %t.la64.o # RUN: llvm-readobj --file-headers %t.la64.o | FileCheck %s --check-prefixes=CHECK,LE,LA64,64 +# RUN: llvm-objcopy -I binary -O elf64-s390 %t.txt %t.s390x.o +# RUN: llvm-readobj --file-headers %t.s390x.o | FileCheck %s --check-prefixes=CHECK,BE,S390X,64 + # CHECK: Format: # 32-SAME: elf32- # 64-SAME: elf64- @@ -64,6 +67,7 @@ # PPCLE-SAME: powerpcle{{$}} # SPARC-SAME: sparc # SPARCEL-SAME: sparc +# S390X-SAME: s390 # X86-64-SAME: x86-64 # AARCH64-NEXT: Arch: aarch64 @@ -81,6 +85,7 @@ # RISCV64-NEXT: Arch: riscv64 # SPARC-NEXT: Arch: sparc{{$}} # SPARCEL-NEXT: Arch: sparcel +# S390X-NEXT: Arch: s390x # X86-64-NEXT: Arch: x86_64 # 32-NEXT: AddressSize: 32bit @@ -116,6 +121,7 @@ # RISCV64-NEXT: Machine: EM_RISCV (0xF3) # SPARC-NEXT: Machine: EM_SPARC (0x2) # SPARCEL-NEXT: Machine: EM_SPARC (0x2) +# S390X-NEXT: Machine: EM_S390 (0x16) # X86-64-NEXT: Machine: EM_X86_64 (0x3E) # CHECK-NEXT: Version: 1 diff --git a/llvm/test/tools/llvm-objcopy/ELF/cross-arch-headers.test b/llvm/test/tools/llvm-objcopy/ELF/cross-arch-headers.test index 882940c05e19c2..9a8128611792d5 100644 --- a/llvm/test/tools/llvm-objcopy/ELF/cross-arch-headers.test +++ b/llvm/test/tools/llvm-objcopy/ELF/cross-arch-headers.test @@ -117,6 +117,10 @@ # RUN: llvm-readobj --file-headers %t.elf64_loongarch.o | FileCheck %s --check-prefixes=CHECK,LE,LA64,64,SYSV # RUN: llvm-readobj --file-headers %t.elf64_loongarch.dwo | FileCheck %s --check-prefixes=CHECK,LE,LA64,64,SYSV +# RUN: llvm-objcopy %t.o -O elf64-s390 %t.elf64_s390.o --split-dwo=%t.elf64_s390.dwo +# RUN: llvm-readobj --file-headers %t.elf64_s390.o | FileCheck %s --check-prefixes=CHECK,BE,S390X,64,SYSV +# RUN: llvm-readobj --file-headers %t.elf64_s390.dwo | FileCheck %s --check-prefixes=CHECK,BE,S390X,64,SYSV + !ELF FileHeader: Class: ELFCLASS32 @@ -160,6 +164,7 @@ Symbols: # RISCV32-SAME: riscv{{$}} # RISCV64-SAME: riscv{{$}} # SPARC-SAME: sparc +# S390X-SAME: s390 # X86-64-SAME: x86-64 # DEFAULT-SAME: unknown @@ -182,6 +187,7 @@ Symbols: # RISCV64-NEXT: Arch: riscv64 # SPARC-NEXT: Arch: sparc{{$}} # SPARCEL-NEXT: Arch: sparcel +# S390X-NEXT: Arch: s390x # X86-64-NEXT: Arch: x86_64 # DEFAULT-NEXT: Arch: unknown @@ -210,6 +216,7 @@ Symbols: # RISCV32: Machine: EM_RISCV (0xF3) # RISCV64: Machine: EM_RISCV (0xF3) # SPARC: Machine: EM_SPARC (0x2) +# S390X: Machine: EM_S390 (0x16) # X86-64: Machine: EM_X86_64 (0x3E) # 32: HeaderSize: 52 diff --git a/llvm/test/tools/llvm-objdump/openbsd-headers.test b/llvm/test/tools/llvm-objdump/openbsd-headers.test index f547854feeeedf..84fa59bdf89f5c 100644 --- a/llvm/test/tools/llvm-objdump/openbsd-headers.test +++ b/llvm/test/tools/llvm-objdump/openbsd-headers.test @@ -11,6 +11,8 @@ # CHECK-NEXT: filesz 0x0000000000000000 memsz 0x0000000000000000 flags --- # CHECK-NEXT: OPENBSD_NOBTCFI off 0x0000000000000000 vaddr 0x0000000000000000 paddr 0x0000000000000000 align 2**0 # CHECK-NEXT: filesz 0x0000000000000000 memsz 0x0000000000000000 flags --- +# CHECK-NEXT: OPENBSD_SYSCALLS off 0x0000000000000000 vaddr 0x0000000000000000 paddr 0x0000000000000000 align 2**0 +# CHECK-NEXT: filesz 0x0000000000000000 memsz 0x0000000000000000 flags --- # CHECK-NEXT: OPENBSD_BOOTDATA off 0x0000000000000000 vaddr 0x0000000000000000 paddr 0x0000000000000000 align 2**0 # CHECK-NEXT: filesz 0x0000000000000000 memsz 0x0000000000000000 flags --- @@ -25,4 +27,5 @@ ProgramHeaders: - Type: 0x65a3dbe6 ## PT_OPENBSD_RANDOMIZE - Type: 0x65a3dbe7 ## PT_OPENBSD_WXNEEDED - Type: 0x65a3dbe8 ## PT_OPENBSD_NOBTCFI + - Type: 0x65a3dbe9 ## PT_OPENBSD_SYSCALLS - Type: 0x65a41be6 ## PT_OPENBSD_BOOTDATA diff --git a/llvm/test/tools/llvm-readobj/ELF/program-headers.test b/llvm/test/tools/llvm-readobj/ELF/program-headers.test index 702a06b6403f0a..856cf378ddad95 100644 --- a/llvm/test/tools/llvm-readobj/ELF/program-headers.test +++ b/llvm/test/tools/llvm-readobj/ELF/program-headers.test @@ -29,68 +29,70 @@ # RUN: FileCheck %s --check-prefixes=ELF64,MAPPING --strict-whitespace --match-full-lines # RUN: llvm-readobj -l %t64.elf | FileCheck %s --check-prefixes=ELF-LLVM,ELF64-LLVM -# ELF32:There are 25 program headers, starting at offset 52 +# ELF32:There are 26 program headers, starting at offset 52 # ELF32-EMPTY: # ELF32-NEXT:Program Headers: # ELF32-NEXT: Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align -# ELF32-NEXT: PHDR 0x000354 0x00001000 0x00001000 0x00003 0x00003 W 0x1 -# ELF32-NEXT: PHDR 0x000357 0x00002000 0x00002000 0x00007 0x00007 E 0x1 -# ELF32-NEXT: NULL 0x000357 0x00002000 0x00002000 0x00007 0x00007 E 0x1 -# ELF32-NEXT: DYNAMIC 0x000354 0x00001000 0x00001000 0x00003 0x00003 RWE 0x1 -# ELF32-NEXT: INTERP 0x00035e 0x00003000 0x00003000 0x00004 0x00004 RW 0x1 +# ELF32-NEXT: PHDR 0x000374 0x00001000 0x00001000 0x00003 0x00003 W 0x1 +# ELF32-NEXT: PHDR 0x000377 0x00002000 0x00002000 0x00007 0x00007 E 0x1 +# ELF32-NEXT: NULL 0x000377 0x00002000 0x00002000 0x00007 0x00007 E 0x1 +# ELF32-NEXT: DYNAMIC 0x000374 0x00001000 0x00001000 0x00003 0x00003 RWE 0x1 +# ELF32-NEXT: INTERP 0x00037e 0x00003000 0x00003000 0x00004 0x00004 RW 0x1 # ELF32-NEXT: [Requesting program interpreter: ABC] -# ELF32-NEXT: NOTE 0x000354 0x00001000 0x00001000 0x00003 0x00003 0x1 -# ELF32-NEXT: SHLIB 0x000354 0x00001000 0x00001000 0x00001 0x00001 0x1 -# ELF32-NEXT: TLS 0x000362 0x00004000 0x00004000 0x00001 0x00001 0x1 -# ELF32-NEXT: : 0x60000000 0x000354 0x00001000 0x00001000 0x00003 0x00003 0x1 -# ELF32-NEXT: GNU_EH_FRAME 0x000354 0x00001000 0x00001000 0x00003 0x00003 0x1 -# ELF32-NEXT: SUNW_UNWIND 0x000354 0x00001000 0x00001000 0x00003 0x00003 0x1 -# ELF32-NEXT: GNU_STACK 0x000354 0x00001000 0x00001000 0x00003 0x00003 0x1 -# ELF32-NEXT: GNU_RELRO 0x000354 0x00001000 0x00001000 0x00003 0x00003 0x1 -# ELF32-NEXT: GNU_PROPERTY 0x000354 0x00001000 0x00001000 0x00003 0x00003 0x1 -# ELF32-NEXT: OPENBSD_MUTABLE 0x000354 0x00001000 0x00001000 0x00003 0x00003 0x1 -# ELF32-NEXT: OPENBSD_RANDOMIZE 0x000354 0x00001000 0x00001000 0x00003 0x00003 0x1 -# ELF32-NEXT: OPENBSD_WXNEEDED 0x000354 0x00001000 0x00001000 0x00003 0x00003 0x1 -# ELF32-NEXT: OPENBSD_NOBTCFI 0x000354 0x00001000 0x00001000 0x00003 0x00003 0x1 -# ELF32-NEXT: OPENBSD_BOOTDATA 0x000354 0x00001000 0x00001000 0x00003 0x00003 0x1 -# ELF32-NEXT: : 0x6fffffff 0x000354 0x00001000 0x00001000 0x00003 0x00003 0x1 -# ELF32-NEXT: : 0x70000000 0x000354 0x00001000 0x00001000 0x00003 0x00003 0x1 -# ELF32-NEXT: : 0x70000001 0x000354 0x00001000 0x00001000 0x00003 0x00003 0x1 -# ELF32-NEXT: : 0x70000002 0x000354 0x00001000 0x00001000 0x00003 0x00003 0x1 -# ELF32-NEXT: : 0x70000003 0x000354 0x00001000 0x00001000 0x00003 0x00003 0x1 -# ELF32-NEXT: : 0x7fffffff 0x000354 0x00001000 0x00001000 0x00003 0x00003 0x1 +# ELF32-NEXT: NOTE 0x000374 0x00001000 0x00001000 0x00003 0x00003 0x1 +# ELF32-NEXT: SHLIB 0x000374 0x00001000 0x00001000 0x00001 0x00001 0x1 +# ELF32-NEXT: TLS 0x000382 0x00004000 0x00004000 0x00001 0x00001 0x1 +# ELF32-NEXT: : 0x60000000 0x000374 0x00001000 0x00001000 0x00003 0x00003 0x1 +# ELF32-NEXT: GNU_EH_FRAME 0x000374 0x00001000 0x00001000 0x00003 0x00003 0x1 +# ELF32-NEXT: SUNW_UNWIND 0x000374 0x00001000 0x00001000 0x00003 0x00003 0x1 +# ELF32-NEXT: GNU_STACK 0x000374 0x00001000 0x00001000 0x00003 0x00003 0x1 +# ELF32-NEXT: GNU_RELRO 0x000374 0x00001000 0x00001000 0x00003 0x00003 0x1 +# ELF32-NEXT: GNU_PROPERTY 0x000374 0x00001000 0x00001000 0x00003 0x00003 0x1 +# ELF32-NEXT: OPENBSD_MUTABLE 0x000374 0x00001000 0x00001000 0x00003 0x00003 0x1 +# ELF32-NEXT: OPENBSD_RANDOMIZE 0x000374 0x00001000 0x00001000 0x00003 0x00003 0x1 +# ELF32-NEXT: OPENBSD_WXNEEDED 0x000374 0x00001000 0x00001000 0x00003 0x00003 0x1 +# ELF32-NEXT: OPENBSD_NOBTCFI 0x000374 0x00001000 0x00001000 0x00003 0x00003 0x1 +# ELF32-NEXT: OPENBSD_SYSCALLS 0x000374 0x00001000 0x00001000 0x00003 0x00003 0x1 +# ELF32-NEXT: OPENBSD_BOOTDATA 0x000374 0x00001000 0x00001000 0x00003 0x00003 0x1 +# ELF32-NEXT: : 0x6fffffff 0x000374 0x00001000 0x00001000 0x00003 0x00003 0x1 +# ELF32-NEXT: : 0x70000000 0x000374 0x00001000 0x00001000 0x00003 0x00003 0x1 +# ELF32-NEXT: : 0x70000001 0x000374 0x00001000 0x00001000 0x00003 0x00003 0x1 +# ELF32-NEXT: : 0x70000002 0x000374 0x00001000 0x00001000 0x00003 0x00003 0x1 +# ELF32-NEXT: : 0x70000003 0x000374 0x00001000 0x00001000 0x00003 0x00003 0x1 +# ELF32-NEXT: : 0x7fffffff 0x000374 0x00001000 0x00001000 0x00003 0x00003 0x1 # ELF32-EMPTY: -# ELF64:There are 25 program headers, starting at offset 64 +# ELF64:There are 26 program headers, starting at offset 64 # ELF64-EMPTY: # ELF64-NEXT:Program Headers: # ELF64-NEXT: Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align -# ELF64-NEXT: PHDR 0x0005b8 0x0000000000001000 0x0000000000001000 0x000003 0x000003 W 0x1 -# ELF64-NEXT: PHDR 0x0005bb 0x0000000000002000 0x0000000000002000 0x000007 0x000007 E 0x1 -# ELF64-NEXT: NULL 0x0005bb 0x0000000000002000 0x0000000000002000 0x000007 0x000007 E 0x1 -# ELF64-NEXT: DYNAMIC 0x0005b8 0x0000000000001000 0x0000000000001000 0x000003 0x000003 RWE 0x1 -# ELF64-NEXT: INTERP 0x0005c2 0x0000000000003000 0x0000000000003000 0x000004 0x000004 RW 0x1 +# ELF64-NEXT: PHDR 0x0005f0 0x0000000000001000 0x0000000000001000 0x000003 0x000003 W 0x1 +# ELF64-NEXT: PHDR 0x0005f3 0x0000000000002000 0x0000000000002000 0x000007 0x000007 E 0x1 +# ELF64-NEXT: NULL 0x0005f3 0x0000000000002000 0x0000000000002000 0x000007 0x000007 E 0x1 +# ELF64-NEXT: DYNAMIC 0x0005f0 0x0000000000001000 0x0000000000001000 0x000003 0x000003 RWE 0x1 +# ELF64-NEXT: INTERP 0x0005fa 0x0000000000003000 0x0000000000003000 0x000004 0x000004 RW 0x1 # ELF64-NEXT: [Requesting program interpreter: ABC] -# ELF64-NEXT: NOTE 0x0005b8 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 -# ELF64-NEXT: SHLIB 0x0005b8 0x0000000000001000 0x0000000000001000 0x000001 0x000001 0x1 -# ELF64-NEXT: TLS 0x0005c6 0x0000000000004000 0x0000000000004000 0x000001 0x000001 0x1 -# ELF64-NEXT: : 0x60000000 0x0005b8 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 -# ELF64-NEXT: GNU_EH_FRAME 0x0005b8 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 -# ELF64-NEXT: SUNW_UNWIND 0x0005b8 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 -# ELF64-NEXT: GNU_STACK 0x0005b8 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 -# ELF64-NEXT: GNU_RELRO 0x0005b8 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 -# ELF64-NEXT: GNU_PROPERTY 0x0005b8 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 -# ELF64-NEXT: OPENBSD_MUTABLE 0x0005b8 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 -# ELF64-NEXT: OPENBSD_RANDOMIZE 0x0005b8 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 -# ELF64-NEXT: OPENBSD_WXNEEDED 0x0005b8 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 -# ELF64-NEXT: OPENBSD_NOBTCFI 0x0005b8 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 -# ELF64-NEXT: OPENBSD_BOOTDATA 0x0005b8 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 -# ELF64-NEXT: : 0x6fffffff 0x0005b8 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 -# ELF64-NEXT: : 0x70000000 0x0005b8 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 -# ELF64-NEXT: : 0x70000001 0x0005b8 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 -# ELF64-NEXT: : 0x70000002 0x0005b8 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 -# ELF64-NEXT: : 0x70000003 0x0005b8 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 -# ELF64-NEXT: : 0x7fffffff 0x0005b8 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 +# ELF64-NEXT: NOTE 0x0005f0 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 +# ELF64-NEXT: SHLIB 0x0005f0 0x0000000000001000 0x0000000000001000 0x000001 0x000001 0x1 +# ELF64-NEXT: TLS 0x0005fe 0x0000000000004000 0x0000000000004000 0x000001 0x000001 0x1 +# ELF64-NEXT: : 0x60000000 0x0005f0 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 +# ELF64-NEXT: GNU_EH_FRAME 0x0005f0 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 +# ELF64-NEXT: SUNW_UNWIND 0x0005f0 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 +# ELF64-NEXT: GNU_STACK 0x0005f0 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 +# ELF64-NEXT: GNU_RELRO 0x0005f0 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 +# ELF64-NEXT: GNU_PROPERTY 0x0005f0 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 +# ELF64-NEXT: OPENBSD_MUTABLE 0x0005f0 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 +# ELF64-NEXT: OPENBSD_RANDOMIZE 0x0005f0 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 +# ELF64-NEXT: OPENBSD_WXNEEDED 0x0005f0 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 +# ELF64-NEXT: OPENBSD_NOBTCFI 0x0005f0 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 +# ELF64-NEXT: OPENBSD_SYSCALLS 0x0005f0 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 +# ELF64-NEXT: OPENBSD_BOOTDATA 0x0005f0 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 +# ELF64-NEXT: : 0x6fffffff 0x0005f0 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 +# ELF64-NEXT: : 0x70000000 0x0005f0 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 +# ELF64-NEXT: : 0x70000001 0x0005f0 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 +# ELF64-NEXT: : 0x70000002 0x0005f0 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 +# ELF64-NEXT: : 0x70000003 0x0005f0 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 +# ELF64-NEXT: : 0x7fffffff 0x0005f0 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 # ELF64-EMPTY: # MAPPING: Section to Segment mapping: @@ -120,13 +122,14 @@ # MAPPING-NEXT: 22 .foo.begin .foo.end {{$}} # MAPPING-NEXT: 23 .foo.begin .foo.end {{$}} # MAPPING-NEXT: 24 .foo.begin .foo.end {{$}} +# MAPPING-NEXT: 25 .foo.begin .foo.end {{$}} # MAPPING-NEXT: None .unused .strtab .shstrtab {{$}} # ELF-LLVM: ProgramHeaders [ # ELF-LLVM-NEXT: ProgramHeader { # ELF-LLVM-NEXT: Type: PT_PHDR (0x6) -# ELF32-LLVM-NEXT: Offset: 0x354 -# ELF64-LLVM-NEXT: Offset: 0x5B8 +# ELF32-LLVM-NEXT: Offset: 0x374 +# ELF64-LLVM-NEXT: Offset: 0x5F0 # ELF-LLVM-NEXT: VirtualAddress: 0x1000 # ELF-LLVM-NEXT: PhysicalAddress: 0x1000 # ELF-LLVM-NEXT: FileSize: 3 @@ -138,8 +141,8 @@ # ELF-LLVM-NEXT: } # ELF-LLVM-NEXT: ProgramHeader { # ELF-LLVM-NEXT: Type: PT_PHDR (0x6) -# ELF32-LLVM-NEXT: Offset: 0x357 -# ELF64-LLVM-NEXT: Offset: 0x5BB +# ELF32-LLVM-NEXT: Offset: 0x377 +# ELF64-LLVM-NEXT: Offset: 0x5F3 # ELF-LLVM-NEXT: VirtualAddress: 0x2000 # ELF-LLVM-NEXT: PhysicalAddress: 0x2000 # ELF-LLVM-NEXT: FileSize: 7 @@ -151,8 +154,8 @@ # ELF-LLVM-NEXT: } # ELF-LLVM-NEXT: ProgramHeader { # ELF-LLVM-NEXT: Type: PT_NULL (0x0) -# ELF32-LLVM-NEXT: Offset: 0x357 -# ELF64-LLVM-NEXT: Offset: 0x5BB +# ELF32-LLVM-NEXT: Offset: 0x377 +# ELF64-LLVM-NEXT: Offset: 0x5F3 # ELF-LLVM-NEXT: VirtualAddress: 0x2000 # ELF-LLVM-NEXT: PhysicalAddress: 0x2000 # ELF-LLVM-NEXT: FileSize: 7 @@ -164,8 +167,8 @@ # ELF-LLVM-NEXT: } # ELF-LLVM-NEXT: ProgramHeader { # ELF-LLVM-NEXT: Type: PT_DYNAMIC (0x2) -# ELF32-LLVM-NEXT: Offset: 0x354 -# ELF64-LLVM-NEXT: Offset: 0x5B8 +# ELF32-LLVM-NEXT: Offset: 0x374 +# ELF64-LLVM-NEXT: Offset: 0x5F0 # ELF-LLVM-NEXT: VirtualAddress: 0x1000 # ELF-LLVM-NEXT: PhysicalAddress: 0x1000 # ELF-LLVM-NEXT: FileSize: 3 @@ -179,8 +182,8 @@ # ELF-LLVM-NEXT: } # ELF-LLVM-NEXT: ProgramHeader { # ELF-LLVM-NEXT: Type: PT_INTERP (0x3) -# ELF32-LLVM-NEXT: Offset: 0x35E -# ELF64-LLVM-NEXT: Offset: 0x5C2 +# ELF32-LLVM-NEXT: Offset: 0x37E +# ELF64-LLVM-NEXT: Offset: 0x5FA # ELF-LLVM-NEXT: VirtualAddress: 0x3000 # ELF-LLVM-NEXT: PhysicalAddress: 0x3000 # ELF-LLVM-NEXT: FileSize: 4 @@ -193,8 +196,8 @@ # ELF-LLVM-NEXT: } # ELF-LLVM-NEXT: ProgramHeader { # ELF-LLVM-NEXT: Type: PT_NOTE (0x4) -# ELF32-LLVM-NEXT: Offset: 0x354 -# ELF64-LLVM-NEXT: Offset: 0x5B8 +# ELF32-LLVM-NEXT: Offset: 0x374 +# ELF64-LLVM-NEXT: Offset: 0x5F0 # ELF-LLVM-NEXT: VirtualAddress: 0x1000 # ELF-LLVM-NEXT: PhysicalAddress: 0x1000 # ELF-LLVM-NEXT: FileSize: 3 @@ -205,8 +208,8 @@ # ELF-LLVM-NEXT: } # ELF-LLVM-NEXT: ProgramHeader { # ELF-LLVM-NEXT: Type: PT_SHLIB (0x5) -# ELF32-LLVM-NEXT: Offset: 0x354 -# ELF64-LLVM-NEXT: Offset: 0x5B8 +# ELF32-LLVM-NEXT: Offset: 0x374 +# ELF64-LLVM-NEXT: Offset: 0x5F0 # ELF-LLVM-NEXT: VirtualAddress: 0x1000 # ELF-LLVM-NEXT: PhysicalAddress: 0x1000 # ELF-LLVM-NEXT: FileSize: 1 @@ -217,8 +220,8 @@ # ELF-LLVM-NEXT: } # ELF-LLVM-NEXT: ProgramHeader { # ELF-LLVM-NEXT: Type: PT_TLS (0x7) -# ELF32-LLVM-NEXT: Offset: 0x362 -# ELF64-LLVM-NEXT: Offset: 0x5C6 +# ELF32-LLVM-NEXT: Offset: 0x382 +# ELF64-LLVM-NEXT: Offset: 0x5FE # ELF-LLVM-NEXT: VirtualAddress: 0x4000 # ELF-LLVM-NEXT: PhysicalAddress: 0x4000 # ELF-LLVM-NEXT: FileSize: 1 @@ -229,8 +232,8 @@ # ELF-LLVM-NEXT: } # ELF-LLVM-NEXT: ProgramHeader { # ELF-LLVM-NEXT: Type: Unknown (0x60000000) -# ELF32-LLVM-NEXT: Offset: 0x354 -# ELF64-LLVM-NEXT: Offset: 0x5B8 +# ELF32-LLVM-NEXT: Offset: 0x374 +# ELF64-LLVM-NEXT: Offset: 0x5F0 # ELF-LLVM-NEXT: VirtualAddress: 0x1000 # ELF-LLVM-NEXT: PhysicalAddress: 0x1000 # ELF-LLVM-NEXT: FileSize: 3 @@ -241,8 +244,8 @@ # ELF-LLVM-NEXT: } # ELF-LLVM-NEXT: ProgramHeader { # ELF-LLVM-NEXT: Type: PT_GNU_EH_FRAME (0x6474E550) -# ELF32-LLVM-NEXT: Offset: 0x354 -# ELF64-LLVM-NEXT: Offset: 0x5B8 +# ELF32-LLVM-NEXT: Offset: 0x374 +# ELF64-LLVM-NEXT: Offset: 0x5F0 # ELF-LLVM-NEXT: VirtualAddress: 0x1000 # ELF-LLVM-NEXT: PhysicalAddress: 0x1000 # ELF-LLVM-NEXT: FileSize: 3 @@ -253,8 +256,8 @@ # ELF-LLVM-NEXT: } # ELF-LLVM-NEXT: ProgramHeader { # ELF-LLVM-NEXT: Type: PT_SUNW_UNWIND (0x6464E550) -# ELF32-LLVM-NEXT: Offset: 0x354 -# ELF64-LLVM-NEXT: Offset: 0x5B8 +# ELF32-LLVM-NEXT: Offset: 0x374 +# ELF64-LLVM-NEXT: Offset: 0x5F0 # ELF-LLVM-NEXT: VirtualAddress: 0x1000 # ELF-LLVM-NEXT: PhysicalAddress: 0x1000 # ELF-LLVM-NEXT: FileSize: 3 @@ -265,8 +268,8 @@ # ELF-LLVM-NEXT: } # ELF-LLVM-NEXT: ProgramHeader { # ELF-LLVM-NEXT: Type: PT_GNU_STACK (0x6474E551) -# ELF32-LLVM-NEXT: Offset: 0x354 -# ELF64-LLVM-NEXT: Offset: 0x5B8 +# ELF32-LLVM-NEXT: Offset: 0x374 +# ELF64-LLVM-NEXT: Offset: 0x5F0 # ELF-LLVM-NEXT: VirtualAddress: 0x1000 # ELF-LLVM-NEXT: PhysicalAddress: 0x1000 # ELF-LLVM-NEXT: FileSize: 3 @@ -277,8 +280,8 @@ # ELF-LLVM-NEXT: } # ELF-LLVM-NEXT: ProgramHeader { # ELF-LLVM-NEXT: Type: PT_GNU_RELRO (0x6474E552) -# ELF32-LLVM-NEXT: Offset: 0x354 -# ELF64-LLVM-NEXT: Offset: 0x5B8 +# ELF32-LLVM-NEXT: Offset: 0x374 +# ELF64-LLVM-NEXT: Offset: 0x5F0 # ELF-LLVM-NEXT: VirtualAddress: 0x1000 # ELF-LLVM-NEXT: PhysicalAddress: 0x1000 # ELF-LLVM-NEXT: FileSize: 3 @@ -289,8 +292,8 @@ # ELF-LLVM-NEXT: } # ELF-LLVM-NEXT: ProgramHeader { # ELF-LLVM-NEXT: Type: PT_GNU_PROPERTY (0x6474E553) -# ELF32-LLVM-NEXT: Offset: 0x354 -# ELF64-LLVM-NEXT: Offset: 0x5B8 +# ELF32-LLVM-NEXT: Offset: 0x374 +# ELF64-LLVM-NEXT: Offset: 0x5F0 # ELF-LLVM-NEXT: VirtualAddress: 0x1000 # ELF-LLVM-NEXT: PhysicalAddress: 0x1000 # ELF-LLVM-NEXT: FileSize: 3 @@ -301,8 +304,8 @@ # ELF-LLVM-NEXT: } # ELF-LLVM-NEXT: ProgramHeader { # ELF-LLVM-NEXT: Type: PT_OPENBSD_MUTABLE (0x65A3DBE5) -# ELF32-LLVM-NEXT: Offset: 0x354 -# ELF64-LLVM-NEXT: Offset: 0x5B8 +# ELF32-LLVM-NEXT: Offset: 0x374 +# ELF64-LLVM-NEXT: Offset: 0x5F0 # ELF-LLVM-NEXT: VirtualAddress: 0x1000 # ELF-LLVM-NEXT: PhysicalAddress: 0x1000 # ELF-LLVM-NEXT: FileSize: 3 @@ -313,8 +316,8 @@ # ELF-LLVM-NEXT: } # ELF-LLVM-NEXT: ProgramHeader { # ELF-LLVM-NEXT: Type: PT_OPENBSD_RANDOMIZE (0x65A3DBE6) -# ELF32-LLVM-NEXT: Offset: 0x354 -# ELF64-LLVM-NEXT: Offset: 0x5B8 +# ELF32-LLVM-NEXT: Offset: 0x374 +# ELF64-LLVM-NEXT: Offset: 0x5F0 # ELF-LLVM-NEXT: VirtualAddress: 0x1000 # ELF-LLVM-NEXT: PhysicalAddress: 0x1000 # ELF-LLVM-NEXT: FileSize: 3 @@ -325,8 +328,8 @@ # ELF-LLVM-NEXT: } # ELF-LLVM-NEXT: ProgramHeader { # ELF-LLVM-NEXT: Type: PT_OPENBSD_WXNEEDED (0x65A3DBE7) -# ELF32-LLVM-NEXT: Offset: 0x354 -# ELF64-LLVM-NEXT: Offset: 0x5B8 +# ELF32-LLVM-NEXT: Offset: 0x374 +# ELF64-LLVM-NEXT: Offset: 0x5F0 # ELF-LLVM-NEXT: VirtualAddress: 0x1000 # ELF-LLVM-NEXT: PhysicalAddress: 0x1000 # ELF-LLVM-NEXT: FileSize: 3 @@ -337,8 +340,20 @@ # ELF-LLVM-NEXT: } # ELF-LLVM-NEXT: ProgramHeader { # ELF-LLVM-NEXT: Type: PT_OPENBSD_NOBTCFI (0x65A3DBE8) -# ELF32-LLVM-NEXT: Offset: 0x354 -# ELF64-LLVM-NEXT: Offset: 0x5B8 +# ELF32-LLVM-NEXT: Offset: 0x374 +# ELF64-LLVM-NEXT: Offset: 0x5F0 +# ELF-LLVM-NEXT: VirtualAddress: 0x1000 +# ELF-LLVM-NEXT: PhysicalAddress: 0x1000 +# ELF-LLVM-NEXT: FileSize: 3 +# ELF-LLVM-NEXT: MemSize: 3 +# ELF-LLVM-NEXT: Flags [ (0x0) +# ELF-LLVM-NEXT: ] +# ELF-LLVM-NEXT: Alignment: 1 +# ELF-LLVM-NEXT: } +# ELF-LLVM-NEXT: ProgramHeader { +# ELF-LLVM-NEXT: Type: PT_OPENBSD_SYSCALLS (0x65A3DBE9) +# ELF32-LLVM-NEXT: Offset: 0x374 +# ELF64-LLVM-NEXT: Offset: 0x5F0 # ELF-LLVM-NEXT: VirtualAddress: 0x1000 # ELF-LLVM-NEXT: PhysicalAddress: 0x1000 # ELF-LLVM-NEXT: FileSize: 3 @@ -349,8 +364,8 @@ # ELF-LLVM-NEXT: } # ELF-LLVM-NEXT: ProgramHeader { # ELF-LLVM-NEXT: Type: PT_OPENBSD_BOOTDATA (0x65A41BE6) -# ELF32-LLVM-NEXT: Offset: 0x354 -# ELF64-LLVM-NEXT: Offset: 0x5B8 +# ELF32-LLVM-NEXT: Offset: 0x374 +# ELF64-LLVM-NEXT: Offset: 0x5F0 # ELF-LLVM-NEXT: VirtualAddress: 0x1000 # ELF-LLVM-NEXT: PhysicalAddress: 0x1000 # ELF-LLVM-NEXT: FileSize: 3 @@ -361,8 +376,8 @@ # ELF-LLVM-NEXT: } # ELF-LLVM-NEXT: ProgramHeader { # ELF-LLVM-NEXT: Type: Unknown (0x6FFFFFFF) -# ELF32-LLVM-NEXT: Offset: 0x354 -# ELF64-LLVM-NEXT: Offset: 0x5B8 +# ELF32-LLVM-NEXT: Offset: 0x374 +# ELF64-LLVM-NEXT: Offset: 0x5F0 # ELF-LLVM-NEXT: VirtualAddress: 0x1000 # ELF-LLVM-NEXT: PhysicalAddress: 0x1000 # ELF-LLVM-NEXT: FileSize: 3 @@ -373,8 +388,8 @@ # ELF-LLVM-NEXT: } # ELF-LLVM-NEXT: ProgramHeader { # ELF-LLVM-NEXT: Type: Unknown (0x70000000) -# ELF32-LLVM-NEXT: Offset: 0x354 -# ELF64-LLVM-NEXT: Offset: 0x5B8 +# ELF32-LLVM-NEXT: Offset: 0x374 +# ELF64-LLVM-NEXT: Offset: 0x5F0 # ELF-LLVM-NEXT: VirtualAddress: 0x1000 # ELF-LLVM-NEXT: PhysicalAddress: 0x1000 # ELF-LLVM-NEXT: FileSize: 3 @@ -385,8 +400,8 @@ # ELF-LLVM-NEXT: } # ELF-LLVM-NEXT: ProgramHeader { # ELF-LLVM-NEXT: Type: Unknown (0x70000001) -# ELF32-LLVM-NEXT: Offset: 0x354 -# ELF64-LLVM-NEXT: Offset: 0x5B8 +# ELF32-LLVM-NEXT: Offset: 0x374 +# ELF64-LLVM-NEXT: Offset: 0x5F0 # ELF-LLVM-NEXT: VirtualAddress: 0x1000 # ELF-LLVM-NEXT: PhysicalAddress: 0x1000 # ELF-LLVM-NEXT: FileSize: 3 @@ -397,8 +412,8 @@ # ELF-LLVM-NEXT: } # ELF-LLVM-NEXT: ProgramHeader { # ELF-LLVM-NEXT: Type: Unknown (0x70000002) -# ELF32-LLVM-NEXT: Offset: 0x354 -# ELF64-LLVM-NEXT: Offset: 0x5B8 +# ELF32-LLVM-NEXT: Offset: 0x374 +# ELF64-LLVM-NEXT: Offset: 0x5F0 # ELF-LLVM-NEXT: VirtualAddress: 0x1000 # ELF-LLVM-NEXT: PhysicalAddress: 0x1000 # ELF-LLVM-NEXT: FileSize: 3 @@ -409,8 +424,8 @@ # ELF-LLVM-NEXT: } # ELF-LLVM-NEXT: ProgramHeader { # ELF-LLVM-NEXT: Type: Unknown (0x70000003) -# ELF32-LLVM-NEXT: Offset: 0x354 -# ELF64-LLVM-NEXT: Offset: 0x5B8 +# ELF32-LLVM-NEXT: Offset: 0x374 +# ELF64-LLVM-NEXT: Offset: 0x5F0 # ELF-LLVM-NEXT: VirtualAddress: 0x1000 # ELF-LLVM-NEXT: PhysicalAddress: 0x1000 # ELF-LLVM-NEXT: FileSize: 3 @@ -421,8 +436,8 @@ # ELF-LLVM-NEXT: } # ELF-LLVM-NEXT: ProgramHeader { # ELF-LLVM-NEXT: Type: Unknown (0x7FFFFFFF) -# ELF32-LLVM-NEXT: Offset: 0x354 -# ELF64-LLVM-NEXT: Offset: 0x5B8 +# ELF32-LLVM-NEXT: Offset: 0x374 +# ELF64-LLVM-NEXT: Offset: 0x5F0 # ELF-LLVM-NEXT: VirtualAddress: 0x1000 # ELF-LLVM-NEXT: PhysicalAddress: 0x1000 # ELF-LLVM-NEXT: FileSize: 3 @@ -569,37 +584,42 @@ ProgramHeaders: VAddr: 0x1000 FirstSec: .foo.begin LastSec: .foo.end -## Case 19: the PT_OPENBSD_BOOTDATA segment. +## Case 19: the PT_OPENBSD_SYSCALLS segment. + - Type: 0x65a3dbe9 ## PT_OPENBSD_SYSCALLS + VAddr: 0x1000 + FirstSec: .foo.begin + LastSec: .foo.end +## Case 20: the PT_OPENBSD_BOOTDATA segment. - Type: 0x65a41be6 ## PT_OPENBSD_BOOTDATA VAddr: 0x1000 FirstSec: .foo.begin LastSec: .foo.end -## Case 20: the PT_HIOS segment. +## Case 21: the PT_HIOS segment. - Type: 0x6fffffff ## PT_HIOS VAddr: 0x1000 FirstSec: .foo.begin LastSec: .foo.end -## Case 21: the PT_LOPROC/PT_ARM_ARCHEXT/PT_MIPS_REGINFO segment. +## Case 22: the PT_LOPROC/PT_ARM_ARCHEXT/PT_MIPS_REGINFO segment. - Type: 0x70000000 ## PT_LOPROC/PT_ARM_ARCHEXT/PT_MIPS_REGINFO VAddr: 0x1000 FirstSec: .foo.begin LastSec: .foo.end -## Case 22: the PT_ARM_EXIDX/PT_MIPS_RTPROC segment. +## Case 23: the PT_ARM_EXIDX/PT_MIPS_RTPROC segment. - Type: 0x70000001 ## PT_ARM_EXIDX, PT_MIPS_RTPROC VAddr: 0x1000 FirstSec: .foo.begin LastSec: .foo.end -## Case 23: the PT_MIPS_OPTIONS segment. +## Case 24: the PT_MIPS_OPTIONS segment. - Type: 0x70000002 ## PT_MIPS_OPTIONS VAddr: 0x1000 FirstSec: .foo.begin LastSec: .foo.end -## Case 24: the PT_MIPS_ABIFLAGS/PT_RISCV_ATTRIBUTES segment. +## Case 25: the PT_MIPS_ABIFLAGS/PT_RISCV_ATTRIBUTES segment. - Type: 0x70000003 ## PT_MIPS_ABIFLAGS/PT_RISCV_ATTRIBUTES VAddr: 0x1000 FirstSec: .foo.begin LastSec: .foo.end -## Case 25: the PT_HIPROC segment. +## Case 26: the PT_HIPROC segment. - Type: 0x7fffffff ## PT_HIPROC VAddr: 0x1000 FirstSec: .foo.begin @@ -610,9 +630,9 @@ ProgramHeaders: # RUN: llvm-readelf --program-headers %tarm.elf | FileCheck %s --check-prefix=ARM-GNU # RUN: llvm-readobj --program-headers %tarm.elf | FileCheck %s --check-prefix=ARM-LLVM -# ARM-GNU: : 0x70000000 0x0005b8 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 -# ARM-GNU-NEXT: EXIDX 0x0005b8 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 -# ARM-GNU-NEXT: : 0x70000002 0x0005b8 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 +# ARM-GNU: : 0x70000000 0x0005f0 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 +# ARM-GNU-NEXT: EXIDX 0x0005f0 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 +# ARM-GNU-NEXT: : 0x70000002 0x0005f0 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 # ARM-LLVM: ProgramHeader { # ARM-LLVM: Type: Unknown (0x70000000) @@ -626,10 +646,10 @@ ProgramHeaders: # RUN: llvm-readelf --program-headers %tmips.elf | FileCheck %s --check-prefix=MIPS-GNU # RUN: llvm-readobj --program-headers %tmips.elf | FileCheck %s --check-prefix=MIPS-LLVM -# MIPS-GNU: REGINFO 0x0005b8 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 -# MIPS-GNU-NEXT: RTPROC 0x0005b8 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 -# MIPS-GNU-NEXT: OPTIONS 0x0005b8 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 -# MIPS-GNU-NEXT: ABIFLAGS 0x0005b8 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 +# MIPS-GNU: REGINFO 0x0005f0 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 +# MIPS-GNU-NEXT: RTPROC 0x0005f0 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 +# MIPS-GNU-NEXT: OPTIONS 0x0005f0 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 +# MIPS-GNU-NEXT: ABIFLAGS 0x0005f0 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 # MIPS-LLVM: ProgramHeader { # MIPS-LLVM: Type: PT_MIPS_REGINFO (0x70000000) @@ -645,7 +665,7 @@ ProgramHeaders: # RUN: llvm-readelf --program-headers %triscv.elf | FileCheck %s --check-prefix=RISCV-GNU # RUN: llvm-readobj --program-headers %triscv.elf | FileCheck %s --check-prefix=RISCV-LLVM -# RISCV-GNU: ATTRIBUTES 0x0005b8 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 +# RISCV-GNU: ATTRIBUTES 0x0005f0 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 # RISCV-LLVM: ProgramHeader { # RISCV-LLVM: Type: PT_RISCV_ATTRIBUTES (0x70000003) diff --git a/llvm/tools/llvm-objcopy/ObjcopyOptions.cpp b/llvm/tools/llvm-objcopy/ObjcopyOptions.cpp index f15307181fad61..f63e5c61e802c8 100644 --- a/llvm/tools/llvm-objcopy/ObjcopyOptions.cpp +++ b/llvm/tools/llvm-objcopy/ObjcopyOptions.cpp @@ -299,6 +299,8 @@ static const StringMap TargetMap{ // LoongArch {"elf32-loongarch", {ELF::EM_LOONGARCH, false, true}}, {"elf64-loongarch", {ELF::EM_LOONGARCH, true, true}}, + // SystemZ + {"elf64-s390", {ELF::EM_S390, true, false}}, }; static Expected diff --git a/llvm/tools/llvm-objdump/ELFDump.cpp b/llvm/tools/llvm-objdump/ELFDump.cpp index 34861ee92128fd..fda99bd6d33e17 100644 --- a/llvm/tools/llvm-objdump/ELFDump.cpp +++ b/llvm/tools/llvm-objdump/ELFDump.cpp @@ -291,6 +291,9 @@ template void ELFDumper::printProgramHeaders() { case ELF::PT_OPENBSD_RANDOMIZE: outs() << "OPENBSD_RANDOMIZE "; break; + case ELF::PT_OPENBSD_SYSCALLS: + outs() << "OPENBSD_SYSCALLS "; + break; case ELF::PT_OPENBSD_WXNEEDED: outs() << "OPENBSD_WXNEEDED "; break; diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp index f369a63add1149..387124ad53e408 100644 --- a/llvm/tools/llvm-readobj/ELFDumper.cpp +++ b/llvm/tools/llvm-readobj/ELFDumper.cpp @@ -1478,6 +1478,7 @@ static StringRef segmentTypeToString(unsigned Arch, unsigned Type) { LLVM_READOBJ_ENUM_CASE(ELF, PT_OPENBSD_RANDOMIZE); LLVM_READOBJ_ENUM_CASE(ELF, PT_OPENBSD_WXNEEDED); LLVM_READOBJ_ENUM_CASE(ELF, PT_OPENBSD_NOBTCFI); + LLVM_READOBJ_ENUM_CASE(ELF, PT_OPENBSD_SYSCALLS); LLVM_READOBJ_ENUM_CASE(ELF, PT_OPENBSD_BOOTDATA); default: return ""; diff --git a/llvm/tools/llvm-shlib/CMakeLists.txt b/llvm/tools/llvm-shlib/CMakeLists.txt index a47a0ec84c625c..09c15e304614c4 100644 --- a/llvm/tools/llvm-shlib/CMakeLists.txt +++ b/llvm/tools/llvm-shlib/CMakeLists.txt @@ -33,7 +33,10 @@ if(LLVM_BUILD_LLVM_DYLIB) if (LLVM_LINK_LLVM_DYLIB) set(INSTALL_WITH_TOOLCHAIN INSTALL_WITH_TOOLCHAIN) endif() - add_llvm_library(LLVM SHARED DISABLE_LLVM_LINK_LLVM_DYLIB SONAME ${INSTALL_WITH_TOOLCHAIN} ${SOURCES}) + add_llvm_library(LLVM SHARED DISABLE_LLVM_LINK_LLVM_DYLIB OUTPUT_NAME LLVM ${INSTALL_WITH_TOOLCHAIN} ${SOURCES}) + # Add symlink for backwards compatibility with old library name + get_target_property(LLVM_DYLIB_FILENAME LLVM OUTPUT_NAME) + llvm_install_library_symlink(LLVM-${LLVM_VERSION_MAJOR}${LLVM_VERSION_SUFFIX} ${LLVM_DYLIB_FILENAME} SHARED COMPONENT LLVM) list(REMOVE_DUPLICATES LIB_NAMES) if("${CMAKE_SYSTEM_NAME}" STREQUAL "Darwin") diff --git a/llvm/utils/release/github-upload-release.py b/llvm/utils/release/github-upload-release.py index a8bb569d2fc999..14ec05062d88c8 100755 --- a/llvm/utils/release/github-upload-release.py +++ b/llvm/utils/release/github-upload-release.py @@ -77,20 +77,28 @@ def upload_files(repo, release, files): parser.add_argument("--token", type=str) parser.add_argument("--release", type=str) parser.add_argument("--user", type=str) +parser.add_argument("--user-token", type=str) # Upload args parser.add_argument("--files", nargs="+", type=str) args = parser.parse_args() -github = github.Github(args.token) -llvm_org = github.get_organization("llvm") +gh = github.Github(args.token) +llvm_org = gh.get_organization("llvm") llvm_repo = llvm_org.get_repo("llvm-project") if args.user: + if not args.user_token: + print("--user-token option required when --user is used") + sys.exit(1) # Validate that this user is allowed to modify releases. - user = github.get_user(args.user) - team = llvm_org.get_team_by_slug("llvm-release-managers") + user = gh.get_user(args.user) + team = ( + github.Github(args.user_token) + .get_organization("llvm") + .get_team_by_slug("llvm-release-managers") + ) if not team.has_in_members(user): print("User {} is not a allowed to modify releases".format(args.user)) sys.exit(1) diff --git a/llvm/utils/release/test-release.sh b/llvm/utils/release/test-release.sh index 5b1945df47d24a..0af16387ce1d8e 100755 --- a/llvm/utils/release/test-release.sh +++ b/llvm/utils/release/test-release.sh @@ -537,6 +537,11 @@ function build_llvmCore() { InstallTarget="$InstallTarget install-runtimes" fi fi + if [ "$Phase" -eq "3" ]; then + # Build everything at once, with the proper parallelism and verbosity, + # in Phase 3. + BuildTarget= + fi cd $ObjDir echo "# Compiling llvm $Release-$RC $Flavor" diff --git a/mlir/test/Target/LLVMIR/llvmir-le-specific.mlir b/mlir/test/Target/LLVMIR/llvmir-le-specific.mlir new file mode 100644 index 00000000000000..f8d082082117cb --- /dev/null +++ b/mlir/test/Target/LLVMIR/llvmir-le-specific.mlir @@ -0,0 +1,27 @@ +// RUN: mlir-translate -mlir-to-llvmir -split-input-file %s | FileCheck %s + +// Decoding the attribute does not work on big-endian platforms currently +// XFAIL: target=s390x-{{.*}} + +// CHECK{LITERAL}: @dense_resource_tensor_constant = internal constant [5 x float] [float 0x3FCA034080000000, float 0xBFD0466300000000, float 0xBFD75DDF80000000, float 0xBFDE074F40000000, float 0x3FDDD3A1C0000000] +llvm.mlir.global internal constant @dense_resource_tensor_constant(dense_resource : tensor<5xf32>) : !llvm.array<5 x f32> + +// CHECK{LITERAL}: @dense_resource_vector_constant = internal constant <5 x float> +llvm.mlir.global internal constant @dense_resource_vector_constant(dense_resource : vector<5xf32>) : vector<5xf32> + + +// CHECK{LITERAL}: @dense_resource_multidim_tensor_constant = internal constant [1 x [2 x [2 x float]]] [[2 x [2 x float]] [[2 x float] [float 0x3FD6B46A80000000, float 0x3FD6781AC0000000], [2 x float] [float 0xBFB45A2AA0000000, float 0x3FD77A5CA0000000]]] +llvm.mlir.global internal constant @dense_resource_multidim_tensor_constant(dense_resource : tensor<1x2x2xf32>) : !llvm.array<1 x !llvm.array<2 x !llvm.array<2 x f32>>> + +// CHECK{LITERAL}: @dense_resource_multidim_vector_constant = internal constant [1 x [2 x <2 x float>]] [[2 x <2 x float>] [<2 x float> , <2 x float> ]] +llvm.mlir.global internal constant @dense_resource_multidim_vector_constant(dense_resource : vector<1x2x2xf32>) : !llvm.array<1 x !llvm.array<2 x vector<2 x f32>>> + +// Resources are kept at end of file. New tests should be added above this. +{-# + dialect_resources: { + builtin: { + dense_resource_test_5xf32: "0x08000000041A503E183382BEFCEEBABE7A3AF0BE0E9DEE3E", + dense_resource_test_2x2xf32: "0x0800000054A3B53ED6C0B33E55D1A2BDE5D2BB3E" + } + } +#-} \ No newline at end of file diff --git a/mlir/test/Target/LLVMIR/llvmir.mlir b/mlir/test/Target/LLVMIR/llvmir.mlir index 448aa3a5d85d79..961c9484446845 100644 --- a/mlir/test/Target/LLVMIR/llvmir.mlir +++ b/mlir/test/Target/LLVMIR/llvmir.mlir @@ -101,19 +101,6 @@ llvm.mlir.global internal @dense_float_vector_3d(dense<[[[1.0, 2.0], [3.0, 4.0]] // CHECK{LITERAL}: @splat_float_vector_3d = internal global [2 x [2 x <2 x float>]] [[2 x <2 x float>] [<2 x float> , <2 x float> ], [2 x <2 x float>] [<2 x float> , <2 x float> ]] llvm.mlir.global internal @splat_float_vector_3d(dense<42.0> : vector<2x2x2xf32>) : !llvm.array<2 x !llvm.array<2 x vector<2xf32>>> -// CHECK{LITERAL}: @dense_resource_tensor_constant = internal constant [5 x float] [float 0x3FCA034080000000, float 0xBFD0466300000000, float 0xBFD75DDF80000000, float 0xBFDE074F40000000, float 0x3FDDD3A1C0000000] -llvm.mlir.global internal constant @dense_resource_tensor_constant(dense_resource : tensor<5xf32>) : !llvm.array<5 x f32> - -// CHECK{LITERAL}: @dense_resource_vector_constant = internal constant <5 x float> -llvm.mlir.global internal constant @dense_resource_vector_constant(dense_resource : vector<5xf32>) : vector<5xf32> - - -// CHECK{LITERAL}: @dense_resource_multidim_tensor_constant = internal constant [1 x [2 x [2 x float]]] [[2 x [2 x float]] [[2 x float] [float 0x3FD6B46A80000000, float 0x3FD6781AC0000000], [2 x float] [float 0xBFB45A2AA0000000, float 0x3FD77A5CA0000000]]] -llvm.mlir.global internal constant @dense_resource_multidim_tensor_constant(dense_resource : tensor<1x2x2xf32>) : !llvm.array<1 x !llvm.array<2 x !llvm.array<2 x f32>>> - -// CHECK{LITERAL}: @dense_resource_multidim_vector_constant = internal constant [1 x [2 x <2 x float>]] [[2 x <2 x float>] [<2 x float> , <2 x float> ]] -llvm.mlir.global internal constant @dense_resource_multidim_vector_constant(dense_resource : vector<1x2x2xf32>) : !llvm.array<1 x !llvm.array<2 x vector<2 x f32>>> - // // Linkage attribute. // @@ -1590,16 +1577,6 @@ llvm.func @invokeLandingpad() -> i32 attributes { personality = @__gxx_personali llvm.invoke %9(%6, %0) to ^bb2 unwind ^bb1 vararg(!llvm.func) : !llvm.ptr, (!llvm.ptr, i32) -> () } -// Resources are kept at end of file. New tests should be added above this. -{-# - dialect_resources: { - builtin: { - dense_resource_test_5xf32: "0x08000000041A503E183382BEFCEEBABE7A3AF0BE0E9DEE3E", - dense_resource_test_2x2xf32: "0x0800000054A3B53ED6C0B33E55D1A2BDE5D2BB3E" - } - } -#-} - // ----- llvm.func @foo() -> i8 diff --git a/openmp/cmake/HandleOpenMPOptions.cmake b/openmp/cmake/HandleOpenMPOptions.cmake index 71346201129b68..9387d9b3b0ff75 100644 --- a/openmp/cmake/HandleOpenMPOptions.cmake +++ b/openmp/cmake/HandleOpenMPOptions.cmake @@ -46,7 +46,11 @@ append_if(OPENMP_HAVE_WEXTRA_FLAG "-Wno-extra" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) append_if(OPENMP_HAVE_WPEDANTIC_FLAG "-Wno-pedantic" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) append_if(OPENMP_HAVE_WMAYBE_UNINITIALIZED_FLAG "-Wno-maybe-uninitialized" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) -append_if(OPENMP_HAVE_NO_SEMANTIC_INTERPOSITION "-fno-semantic-interposition" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) +if (NOT (WIN32 OR CYGWIN)) + # This flag is not relevant on Windows; the flag is accepted, but produces warnings + # about argument unused during compilation. + append_if(OPENMP_HAVE_NO_SEMANTIC_INTERPOSITION "-fno-semantic-interposition" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) +endif() append_if(OPENMP_HAVE_FUNCTION_SECTIONS "-ffunction-section" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) append_if(OPENMP_HAVE_DATA_SECTIONS "-fdata-sections" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h index c287a31e0b1b54..259c57b5afbca5 100644 --- a/openmp/runtime/src/kmp.h +++ b/openmp/runtime/src/kmp.h @@ -1181,7 +1181,11 @@ extern void __kmp_init_target_task(); #define KMP_MIN_STKSIZE ((size_t)(32 * 1024)) #endif +#if KMP_OS_AIX && KMP_ARCH_PPC +#define KMP_MAX_STKSIZE 0x10000000 /* 256Mb max size on 32-bit AIX */ +#else #define KMP_MAX_STKSIZE (~((size_t)1 << ((sizeof(size_t) * (1 << 3)) - 1))) +#endif #if KMP_ARCH_X86 #define KMP_DEFAULT_STKSIZE ((size_t)(2 * 1024 * 1024)) @@ -2494,7 +2498,8 @@ typedef struct kmp_dephash_entry kmp_dephash_entry_t; #define KMP_DEP_MTX 0x4 #define KMP_DEP_SET 0x8 #define KMP_DEP_ALL 0x80 -// Compiler sends us this info: +// Compiler sends us this info. Note: some test cases contain an explicit copy +// of this struct and should be in sync with any changes here. typedef struct kmp_depend_info { kmp_intptr_t base_addr; size_t len; diff --git a/openmp/runtime/src/kmp_csupport.cpp b/openmp/runtime/src/kmp_csupport.cpp index 9eeaeb88fb9ec7..878e78b5c7ad2d 100644 --- a/openmp/runtime/src/kmp_csupport.cpp +++ b/openmp/runtime/src/kmp_csupport.cpp @@ -1533,8 +1533,9 @@ void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, kmp_dyna_lockseq_t lockseq = __kmp_map_hint_to_lock(hint); if (*lk == 0) { if (KMP_IS_D_LOCK(lockseq)) { - KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0, - KMP_GET_D_TAG(lockseq)); + KMP_COMPARE_AND_STORE_ACQ32( + (volatile kmp_int32 *)&((kmp_base_tas_lock_t *)crit)->poll, 0, + KMP_GET_D_TAG(lockseq)); } else { __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lockseq)); } diff --git a/openmp/runtime/src/kmp_gsupport.cpp b/openmp/runtime/src/kmp_gsupport.cpp index 88189659a23416..4dc8a90f83b4ea 100644 --- a/openmp/runtime/src/kmp_gsupport.cpp +++ b/openmp/runtime/src/kmp_gsupport.cpp @@ -144,7 +144,7 @@ void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_BARRIER)(void) { // Mutual exclusion -// The symbol that icc/ifort generates for unnamed for unnamed critical sections +// The symbol that icc/ifort generates for unnamed critical sections // - .gomp_critical_user_ - is defined using .comm in any objects reference it. // We can't reference it directly here in C code, as the symbol contains a ".". // diff --git a/openmp/runtime/src/kmp_lock.cpp b/openmp/runtime/src/kmp_lock.cpp index 85c54f4cdc7e96..0ad14f862bcb9b 100644 --- a/openmp/runtime/src/kmp_lock.cpp +++ b/openmp/runtime/src/kmp_lock.cpp @@ -2689,7 +2689,7 @@ void __kmp_spin_backoff(kmp_backoff_t *boff) { // lock word. static void __kmp_init_direct_lock(kmp_dyna_lock_t *lck, kmp_dyna_lockseq_t seq) { - TCW_4(*lck, KMP_GET_D_TAG(seq)); + TCW_4(((kmp_base_tas_lock_t *)lck)->poll, KMP_GET_D_TAG(seq)); KA_TRACE( 20, ("__kmp_init_direct_lock: initialized direct lock with type#%d\n", seq)); @@ -3180,8 +3180,8 @@ kmp_indirect_lock_t *__kmp_allocate_indirect_lock(void **user_lock, lck->type = tag; if (OMP_LOCK_T_SIZE < sizeof(void *)) { - *((kmp_lock_index_t *)user_lock) = idx - << 1; // indirect lock word must be even + *(kmp_lock_index_t *)&(((kmp_base_tas_lock_t *)user_lock)->poll) = + idx << 1; // indirect lock word must be even } else { *((kmp_indirect_lock_t **)user_lock) = lck; } diff --git a/openmp/runtime/src/kmp_lock.h b/openmp/runtime/src/kmp_lock.h index f21179b4eb68a1..e2a0cda01a9718 100644 --- a/openmp/runtime/src/kmp_lock.h +++ b/openmp/runtime/src/kmp_lock.h @@ -50,7 +50,7 @@ typedef struct ident ident_t; // recent versions), but we are bounded by the pointer-sized chunks that // the Intel compiler allocates. -#if KMP_OS_LINUX && defined(KMP_GOMP_COMPAT) +#if (KMP_OS_LINUX || KMP_OS_AIX) && defined(KMP_GOMP_COMPAT) #define OMP_LOCK_T_SIZE sizeof(int) #define OMP_NEST_LOCK_T_SIZE sizeof(void *) #else @@ -120,8 +120,15 @@ extern void __kmp_validate_locks(void); struct kmp_base_tas_lock { // KMP_LOCK_FREE(tas) => unlocked; locked: (gtid+1) of owning thread +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ && __LP64__ + // Flip the ordering of the high and low 32-bit member to be consistent + // with the memory layout of the address in 64-bit big-endian. + kmp_int32 depth_locked; // depth locked, for nested locks only + std::atomic poll; +#else std::atomic poll; kmp_int32 depth_locked; // depth locked, for nested locks only +#endif }; typedef struct kmp_base_tas_lock kmp_base_tas_lock_t; @@ -1138,11 +1145,13 @@ extern int (**__kmp_indirect_test)(kmp_user_lock_p, kmp_int32); // Extracts direct lock tag from a user lock pointer #define KMP_EXTRACT_D_TAG(l) \ - (*((kmp_dyna_lock_t *)(l)) & ((1 << KMP_LOCK_SHIFT) - 1) & \ - -(*((kmp_dyna_lock_t *)(l)) & 1)) + ((kmp_dyna_lock_t)((kmp_base_tas_lock_t *)(l))->poll & \ + ((1 << KMP_LOCK_SHIFT) - 1) & \ + -((kmp_dyna_lock_t)((kmp_tas_lock_t *)(l))->lk.poll & 1)) // Extracts indirect lock index from a user lock pointer -#define KMP_EXTRACT_I_INDEX(l) (*(kmp_lock_index_t *)(l) >> 1) +#define KMP_EXTRACT_I_INDEX(l) \ + ((kmp_lock_index_t)((kmp_base_tas_lock_t *)(l))->poll >> 1) // Returns function pointer to the direct lock function with l (kmp_dyna_lock_t // *) and op (operation type). diff --git a/openmp/runtime/src/kmp_settings.cpp b/openmp/runtime/src/kmp_settings.cpp index d2157b10b7819a..ec86ee07472c1e 100644 --- a/openmp/runtime/src/kmp_settings.cpp +++ b/openmp/runtime/src/kmp_settings.cpp @@ -255,8 +255,13 @@ static void __kmp_stg_parse_bool(char const *name, char const *value, // placed here in order to use __kmp_round4k static function void __kmp_check_stksize(size_t *val) { // if system stack size is too big then limit the size for worker threads +#if KMP_OS_AIX + if (*val > KMP_DEFAULT_STKSIZE * 2) // Use 2 times, 16 is too large for AIX. + *val = KMP_DEFAULT_STKSIZE * 2; +#else if (*val > KMP_DEFAULT_STKSIZE * 16) // just a heuristics... *val = KMP_DEFAULT_STKSIZE * 16; +#endif if (*val < __kmp_sys_min_stksize) *val = __kmp_sys_min_stksize; if (*val > KMP_MAX_STKSIZE) diff --git a/openmp/runtime/src/z_AIX_asm.S b/openmp/runtime/src/z_AIX_asm.S new file mode 100644 index 00000000000000..d711fcb7a7854f --- /dev/null +++ b/openmp/runtime/src/z_AIX_asm.S @@ -0,0 +1,410 @@ +// z_AIX_asm.S: - microtasking routines specifically +// written for Power platforms running AIX OS + +// +////===----------------------------------------------------------------------===// +//// +//// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +//// See https://llvm.org/LICENSE.txt for license information. +//// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +//// +////===----------------------------------------------------------------------===// +// + +// ----------------------------------------------------------------------- +// macros +// ----------------------------------------------------------------------- + +#include "kmp_config.h" + +#if KMP_OS_AIX +//------------------------------------------------------------------------ +// int +// __kmp_invoke_microtask( void (*pkfn) (int *gtid, int *tid, ...), +// int gtid, int tid, +// int argc, void *p_argv[] +// #if OMPT_SUPPORT +// , +// void **exit_frame_ptr +// #endif +// ) { +// #if OMPT_SUPPORT +// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0); +// #endif +// +// (*pkfn)( & gtid, & tid, p_argv[0], ... ); +// +// // FIXME: This is done at call-site and can be removed here. +// #if OMPT_SUPPORT +// *exit_frame_ptr = 0; +// #endif +// +// return 1; +// } +// +// parameters: +// r3: pkfn +// r4: gtid +// r5: tid +// r6: argc +// r7: p_argv +// r8: &exit_frame +// +// return: r3 (always 1/TRUE) +// + +#if KMP_ARCH_PPC64_XCOFF + + .globl __kmp_invoke_microtask[DS] + .globl .__kmp_invoke_microtask + .align 4 + .csect __kmp_invoke_microtask[DS],3 + .vbyte 8, .__kmp_invoke_microtask + .vbyte 8, TOC[TC0] + .vbyte 8, 0 + .csect .text[PR],2 + .machine "pwr7" +.__kmp_invoke_microtask: + + +// -- Begin __kmp_invoke_microtask +// mark_begin; + +// We need to allocate a stack frame large enough to hold all of the parameters +// on the stack for the microtask plus what this function needs. That's 48 +// bytes under the XCOFF64 ABI, plus max(64, 8*(2 + argc)) for +// the parameters to the microtask (gtid, tid, argc elements of p_argv), +// plus 8 bytes to store the values of r4 and r5, and 8 bytes to store r31. +// With OMP-T support, we need an additional 8 bytes to save r30 to hold +// a copy of r8. +// Stack offsets relative to stack pointer: +// r31: -8, r30: -16, gtid: -20, tid: -24 + + mflr 0 + std 31, -8(1) # Save r31 to the stack + std 0, 16(1) # Save LR to the linkage area + +// This is unusual because normally we'd set r31 equal to r1 after the stack +// frame is established. In this case, however, we need to dynamically compute +// the stack frame size, and so we keep a direct copy of r1 to access our +// register save areas and restore the r1 value before returning. + mr 31, 1 + +// Compute the size of the "argc" portion of the parameter save area. +// The parameter save area is always at least 64 bytes long (i.e. 8 regs) +// The microtask has (2 + argc) parameters, so if argc <= 6, we need to +// to allocate 8*6 bytes, not 8*argc. + li 0, 6 + cmpwi 0, 6, 6 + iselgt 0, 6, 0 # r0 = (argc > 6)? argc : 6 + sldi 0, 0, 3 # r0 = 8 * max(argc, 6) + +// Compute the size necessary for the local stack frame. +// 88 = 48 + 4 (for r4) + 4 (for r5) + 8 (for r31) + 8 (for OMP-T r30) + +// 8 (parameter gtid) + 8 (parameter tid) + li 12, 88 + add 12, 0, 12 + neg 12, 12 + +// We need to make sure that the stack frame stays aligned (to 16 bytes). + li 0, -16 + and 12, 0, 12 + +// Establish the local stack frame. + stdux 1, 1, 12 + +#if OMPT_SUPPORT + std 30, -16(31) # Save r30 to the stack + std 1, 0(8) + mr 30, 8 +#endif + +// Store gtid and tid to the stack because they're passed by reference to the microtask. + stw 4, -20(31) # Save gtid to the stack + stw 5, -24(31) # Save tid to the stack + + mr 12, 6 # r12 = argc + mr 4, 7 # r4 = p_argv + + cmpwi 0, 12, 1 + blt 0, .Lcall # if (argc < 1) goto .Lcall + + ld 5, 0(4) # r5 = p_argv[0] + + cmpwi 0, 12, 2 + blt 0, .Lcall # if (argc < 2) goto .Lcall + + ld 6, 8(4) # r6 = p_argv[1] + + cmpwi 0, 12, 3 + blt 0, .Lcall # if (argc < 3) goto .Lcall + + ld 7, 16(4) # r7 = p_argv[2] + + cmpwi 0, 12, 4 + blt 0, .Lcall # if (argc < 4) goto .Lcall + + ld 8, 24(4) # r8 = p_argv[3] + + cmpwi 0, 12, 5 + blt 0, .Lcall # if (argc < 5) goto .Lcall + + ld 9, 32(4) # r9 = p_argv[4] + + cmpwi 0, 12, 6 + blt 0, .Lcall # if (argc < 6) goto .Lcall + + ld 10, 40(4) # r10 = p_argv[5] + + cmpwi 0, 12, 7 + blt 0, .Lcall # if (argc < 7) goto .Lcall + +// There are more than 6 microtask parameters, so we need to store the +// remainder to the stack. + addi 12, 12, -6 # argc -= 6 + mtctr 12 + +// These are set to 8 bytes before the first desired store address (we're using +// pre-increment loads and stores in the loop below). The parameter save area +// for the microtask begins 48 + 8*8 == 112 bytes above r1 for XCOFF64. + addi 4, 4, 40 # p_argv = p_argv + 5 + # (i.e. skip the 5 elements we already processed) + addi 12, 1, 104 # r12 = stack offset (112 - 8) + +.Lnext: + ldu 0, 8(4) + stdu 0, 8(12) + bdnz .Lnext + +.Lcall: + std 2, 40(1) # Save the TOC pointer to the linkage area +// Load the actual function address from the function descriptor. + ld 12, 0(3) # Function address + ld 2, 8(3) # TOC pointer + ld 11, 16(3) # Environment pointer + + addi 3, 31, -20 # r3 = >id + addi 4, 31, -24 # r4 = &tid + + mtctr 12 # CTR = function address + bctrl # Branch to CTR + ld 2, 40(1) # Restore TOC pointer from linkage area + +#if OMPT_SUPPORT + li 3, 0 + std 3, 0(30) +#endif + + li 3, 1 + +#if OMPT_SUPPORT + ld 30, -16(31) # Restore r30 from the saved value on the stack +#endif + + mr 1, 31 + ld 31, -8(1) # Restore r31 from the saved value on the stack + ld 0, 16(1) + mtlr 0 # Restore LR from the linkage area + blr # Branch to LR + +#else // KMP_ARCH_PPC_XCOFF + + .globl __kmp_invoke_microtask[DS] + .globl .__kmp_invoke_microtask + .align 4 + .csect __kmp_invoke_microtask[DS],2 + .vbyte 4, .__kmp_invoke_microtask + .vbyte 4, TOC[TC0] + .vbyte 4, 0 + .csect .text[PR],2 + .machine "pwr7" +.__kmp_invoke_microtask: + + +// -- Begin __kmp_invoke_microtask +// mark_begin; + +// We need to allocate a stack frame large enough to hold all of the parameters +// on the stack for the microtask plus what this function needs. That's 24 +// bytes under the XCOFF ABI, plus max(32, 8*(2 + argc)) for +// the parameters to the microtask (gtid, tid, argc elements of p_argv), +// plus 8 bytes to store the values of r4 and r5, and 4 bytes to store r31. +// With OMP-T support, we need an additional 4 bytes to save r30 to hold +// a copy of r8. +// Stack offsets relative to stack pointer: +// r31: -4, r30: -8, gtid: -12, tid: -16 + + mflr 0 + stw 31, -4(1) # Save r31 to the stack + stw 0, 8(1) # Save LR to the linkage area + +// This is unusual because normally we'd set r31 equal to r1 after the stack +// frame is established. In this case, however, we need to dynamically compute +// the stack frame size, and so we keep a direct copy of r1 to access our +// register save areas and restore the r1 value before returning. + mr 31, 1 + +// Compute the size of the "argc" portion of the parameter save area. +// The parameter save area is always at least 32 bytes long (i.e. 8 regs) +// The microtask has (2 + argc) parameters, so if argc <= 6, we need to +// to allocate 4*6 bytes, not 4*argc. + li 0, 6 + cmpwi 0, 6, 6 + iselgt 0, 6, 0 # r0 = (argc > 6)? argc : 6 + slwi 0, 0, 2 # r0 = 4 * max(argc, 6) + +// Compute the size necessary for the local stack frame. +// 56 = 32 + 4 (for r4) + 4 (for r5) + 4 (for r31) + 4 (for OMP-T r30) + +// 4 (parameter gtid) + 4 (parameter tid) + li 12, 56 + add 12, 0, 12 + neg 12, 12 + +// We need to make sure that the stack frame stays aligned (to 16 bytes). + li 0, -16 + and 12, 0, 12 + +// Establish the local stack frame. + stwux 1, 1, 12 + +#if OMPT_SUPPORT + stw 30, -8(31) # Save r30 to the stack + stw 1, 0(8) + mr 30, 8 +#endif + +// Store gtid and tid to the stack because they're passed by reference to the microtask. + stw 4, -12(31) # Save gtid to the stack + stw 5, -16(31) # Save tid to the stack + + mr 12, 6 # r12 = argc + mr 4, 7 # r4 = p_argv + + cmpwi 0, 12, 1 + blt 0, .Lcall # if (argc < 1) goto .Lcall + + lwz 5, 0(4) # r5 = p_argv[0] + + cmpwi 0, 12, 2 + blt 0, .Lcall # if (argc < 2) goto .Lcall + + lwz 6, 4(4) # r6 = p_argv[1] + + cmpwi 0, 12, 3 + blt 0, .Lcall # if (argc < 3) goto .Lcall + + lwz 7, 8(4) # r7 = p_argv[2] + + cmpwi 0, 12, 4 + blt 0, .Lcall # if (argc < 4) goto .Lcall + + lwz 8, 12(4) # r8 = p_argv[3] + + cmpwi 0, 12, 5 + blt 0, .Lcall # if (argc < 5) goto .Lcall + + lwz 9, 16(4) # r9 = p_argv[4] + + cmpwi 0, 12, 6 + blt 0, .Lcall # if (argc < 6) goto .Lcall + + lwz 10, 20(4) # r10 = p_argv[5] + + cmpwi 0, 12, 7 + blt 0, .Lcall # if (argc < 7) goto .Lcall + +// There are more than 6 microtask parameters, so we need to store the +// remainder to the stack. + addi 12, 12, -6 # argc -= 6 + mtctr 12 + +// These are set to 4 bytes before the first desired store address (we're using +// pre-increment loads and stores in the loop below). The parameter save area +// for the microtask begins 24 + 4*8 == 56 bytes above r1 for XCOFF. + addi 4, 4, 20 # p_argv = p_argv + 5 + # (i.e. skip the 5 elements we already processed) + addi 12, 1, 52 # r12 = stack offset (56 - 4) + +.Lnext: + lwzu 0, 4(4) + stwu 0, 4(12) + bdnz .Lnext + +.Lcall: + stw 2, 20(1) # Save the TOC pointer to the linkage area +// Load the actual function address from the function descriptor. + lwz 12, 0(3) # Function address + lwz 2, 4(3) # TOC pointer + lwz 11, 8(3) # Environment pointer + + addi 3, 31, -12 # r3 = >id + addi 4, 31, -16 # r4 = &tid + + mtctr 12 # CTR = function address + bctrl # Branch to CTR + lwz 2, 20(1) # Restore TOC pointer from linkage area + +#if OMPT_SUPPORT + li 3, 0 + stw 3, 0(30) +#endif + + li 3, 1 + +#if OMPT_SUPPORT + lwz 30, -8(31) # Restore r30 from the saved value on the stack +#endif + + mr 1, 31 + lwz 31, -4(1) # Restore r31 from the saved value on the stack + lwz 0, 8(1) + mtlr 0 # Restore LR from the linkage area + blr # Branch to LR + +#endif // KMP_ARCH_PPC64_XCOFF + +.Lfunc_end0: + .vbyte 4, 0x00000000 # Traceback table begin + .byte 0x00 # Version = 0 + .byte 0x09 # Language = CPlusPlus + .byte 0x20 # -IsGlobaLinkage, -IsOutOfLineEpilogOrPrologue + # +HasTraceBackTableOffset, -IsInternalProcedure + # -HasControlledStorage, -IsTOCless + # -IsFloatingPointPresent + # -IsFloatingPointOperationLogOrAbortEnabled + .byte 0x61 # -IsInterruptHandler, +IsFunctionNamePresent, +IsAllocaUsed + # OnConditionDirective = 0, -IsCRSaved, +IsLRSaved + .byte 0x80 # +IsBackChainStored, -IsFixup, NumOfFPRsSaved = 0 +#if OMPT_SUPPORT + .byte 0x02 # -HasExtensionTable, -HasVectorInfo, NumOfGPRsSaved = 2 + .byte 0x06 # NumberOfFixedParms = 6 +#else + .byte 0x01 # -HasExtensionTable, -HasVectorInfo, NumOfGPRsSaved = 1 + .byte 0x05 # NumberOfFixedParms = 5 +#endif + .byte 0x01 # NumberOfFPParms = 0, +HasParmsOnStack + .vbyte 4, 0x00000000 # Parameter type = i, i, i, i, i + .vbyte 4, .Lfunc_end0-.__kmp_invoke_microtask # Function size + .vbyte 2, 0x0016 # Function name len = 22 + .byte "__kmp_invoke_microtask" # Function Name + .byte 0x1f # AllocaRegister = 31 + # -- End function + +// -- End __kmp_invoke_microtask + +// Support for unnamed common blocks. + + .comm .gomp_critical_user_, 32, 3 +#if KMP_ARCH_PPC64_XCOFF + .csect __kmp_unnamed_critical_addr[RW],3 +#else + .csect __kmp_unnamed_critical_addr[RW],2 +#endif + .globl __kmp_unnamed_critical_addr[RW] + .ptr .gomp_critical_user_ + +// -- End unnamed common block + + .toc + +#endif // KMP_OS_AIX diff --git a/openmp/runtime/test/tasking/bug_nested_proxy_task.c b/openmp/runtime/test/tasking/bug_nested_proxy_task.c index 43502bdcd1abd1..24fe1f3fe7607c 100644 --- a/openmp/runtime/test/tasking/bug_nested_proxy_task.c +++ b/openmp/runtime/test/tasking/bug_nested_proxy_task.c @@ -50,12 +50,21 @@ typedef struct kmp_depend_info { union { kmp_uint8 flag; // flag as an unsigned char struct { // flag as a set of 8 bits - unsigned in : 1; - unsigned out : 1; - unsigned mtx : 1; - unsigned set : 1; - unsigned unused : 3; - unsigned all : 1; +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + unsigned all : 1; + unsigned unused : 3; + unsigned set : 1; + unsigned mtx : 1; + unsigned out : 1; + unsigned in : 1; +#else + unsigned in : 1; + unsigned out : 1; + unsigned mtx : 1; + unsigned set : 1; + unsigned unused : 3; + unsigned all : 1; +#endif } flags; }; } kmp_depend_info_t; diff --git a/openmp/runtime/test/tasking/bug_proxy_task_dep_waiting.c b/openmp/runtime/test/tasking/bug_proxy_task_dep_waiting.c index ff75df51aff077..688860c035728f 100644 --- a/openmp/runtime/test/tasking/bug_proxy_task_dep_waiting.c +++ b/openmp/runtime/test/tasking/bug_proxy_task_dep_waiting.c @@ -47,12 +47,21 @@ typedef struct kmp_depend_info { union { kmp_uint8 flag; // flag as an unsigned char struct { // flag as a set of 8 bits - unsigned in : 1; - unsigned out : 1; - unsigned mtx : 1; - unsigned set : 1; - unsigned unused : 3; - unsigned all : 1; +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + unsigned all : 1; + unsigned unused : 3; + unsigned set : 1; + unsigned mtx : 1; + unsigned out : 1; + unsigned in : 1; +#else + unsigned in : 1; + unsigned out : 1; + unsigned mtx : 1; + unsigned set : 1; + unsigned unused : 3; + unsigned all : 1; +#endif } flags; }; } kmp_depend_info_t; diff --git a/openmp/runtime/test/tasking/hidden_helper_task/common.h b/openmp/runtime/test/tasking/hidden_helper_task/common.h index 402ecf3ed553c9..ba57656cbac41d 100644 --- a/openmp/runtime/test/tasking/hidden_helper_task/common.h +++ b/openmp/runtime/test/tasking/hidden_helper_task/common.h @@ -17,9 +17,21 @@ typedef struct kmp_depend_info { union { unsigned char flag; struct { - bool in : 1; - bool out : 1; - bool mtx : 1; +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + unsigned all : 1; + unsigned unused : 3; + unsigned set : 1; + unsigned mtx : 1; + unsigned out : 1; + unsigned in : 1; +#else + unsigned in : 1; + unsigned out : 1; + unsigned mtx : 1; + unsigned set : 1; + unsigned unused : 3; + unsigned all : 1; +#endif } flags; }; } kmp_depend_info_t;