diff --git a/.github/workflows/llvm-project-tests.yml b/.github/workflows/llvm-project-tests.yml
index 68b4a68d1af984..43b90193406fc9 100644
--- a/.github/workflows/llvm-project-tests.yml
+++ b/.github/workflows/llvm-project-tests.yml
@@ -58,6 +58,10 @@ jobs:
   lit-tests:
     name: Lit Tests
     runs-on: ${{ matrix.os }}
+    container:
+      image: ${{(startsWith(matrix.os, 'ubuntu') && 'ghcr.io/llvm/ci-ubuntu-22.04:latest') || null}}
+      volumes:
+        - /mnt/:/mnt/
     strategy:
       fail-fast: false
       matrix:
@@ -77,6 +81,7 @@ jobs:
         with:
           python-version: ${{ inputs.python_version }}
       - name: Install Ninja
+        if: runner.os != 'Linux'
         uses: llvm/actions/install-ninja@main
       # actions/checkout deletes any existing files in the new git directory,
       # so this needs to either run before ccache-action or it has to use
@@ -108,8 +113,8 @@ jobs:
         run: |
           if [ "${{ runner.os }}" == "Linux" ]; then
             builddir="/mnt/build/"
-            sudo mkdir -p $builddir
-            sudo chown `whoami`:`whoami` $builddir
+            mkdir -p $builddir
+            extra_cmake_args="-DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COMPILER=clang"
           else
             builddir="$(pwd)"/build
           fi
@@ -123,6 +128,7 @@ jobs:
                 -DLLDB_INCLUDE_TESTS=OFF \
                 -DCMAKE_C_COMPILER_LAUNCHER=sccache \
                 -DCMAKE_CXX_COMPILER_LAUNCHER=sccache \
+                $extra_cmake_args \
                 ${{ inputs.extra_cmake_args }}
           ninja -C "$builddir" '${{ inputs.build_target }}'
 
diff --git a/.github/workflows/release-tasks.yml b/.github/workflows/release-tasks.yml
index f2a831ad3577ad..53da8662b0203a 100644
--- a/.github/workflows/release-tasks.yml
+++ b/.github/workflows/release-tasks.yml
@@ -28,6 +28,7 @@ jobs:
     name: Create a New Release
     runs-on: ubuntu-latest
     needs: validate-tag
+
     steps:
       - name: Install Dependencies
         run: |
@@ -40,8 +41,9 @@ jobs:
       - name: Create Release
         env:
           GITHUB_TOKEN: ${{ github.token }}
+          USER_TOKEN: ${{ secrets.RELEASE_TASKS_USER_TOKEN }}
         run: |
-          ./llvm/utils/release/./github-upload-release.py --token "$GITHUB_TOKEN" --release ${{ needs.validate-tag.outputs.release-version }} --user ${{ github.actor }} create
+          ./llvm/utils/release/./github-upload-release.py --token "$GITHUB_TOKEN" --release ${{ needs.validate-tag.outputs.release-version }} --user ${{ github.actor }} --user-token "$USER_TOKEN" create
   release-documentation:
     name: Build and Upload Release Documentation
     needs:
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 95d44951ae7ee6..93a67e7a895592 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -171,6 +171,22 @@ AST Dumping Potentially Breaking Changes
       "qualType": "foo"
     }
 
+Clang Frontend Potentially Breaking Changes
+-------------------------------------------
+- Target OS macros extension
+  A new Clang extension (see :ref:`here <target_os_detail>`) is enabled for
+  Darwin (Apple platform) targets. Clang now defines ``TARGET_OS_*`` macros for
+  these targets, which could break existing code bases with improper checks for
+  the ``TARGET_OS_`` macros. For example, existing checks might fail to include
+  the ``TargetConditionals.h`` header from Apple SDKs and therefore leaving the
+  macros undefined and guarded code unexercised.
+
+  Affected code should be checked to see if it's still intended for the specific
+  target and fixed accordingly.
+
+  The extension can be turned off by the option ``-fno-define-target-os-macros``
+  as a workaround.
+
 What's New in Clang |release|?
 ==============================
 Some of the major new features and improvements to Clang are listed
@@ -304,6 +320,10 @@ Non-comprehensive list of changes in this release
 
 * The version of Unicode used by Clang (primarily to parse identifiers) has been updated to 15.1.
 
+* Clang now defines macro ``__LLVM_INSTR_PROFILE_GENERATE`` when compiling with
+  PGO instrumentation profile generation, and ``__LLVM_INSTR_PROFILE_USE`` when
+  compiling with PGO profile use.
+
 New Compiler Flags
 ------------------
 
@@ -344,6 +364,17 @@ New Compiler Flags
   attribute the replaceable global new and delete operators behave normally
   (like other functions) with respect to visibility attributes, pragmas and
   options (e.g ``--fvisibility=``).
+* Full register names can be used when printing assembly via ``-mregnames``.
+  This option now matches the one used by GCC.
+
+.. _target_os_detail:
+
+* ``-fdefine-target-os-macros`` and its complement
+  ``-fno-define-target-os-macros``. Enables or disables the Clang extension to
+  provide built-in definitions of a list of ``TARGET_OS_*`` macros based on the
+  target triple.
+
+  The extension is enabled by default for Darwin (Apple platform) targets.
 
 Deprecated Compiler Flags
 -------------------------
@@ -363,6 +394,7 @@ Modified Compiler Flags
 * ``-fvisibility-global-new-delete-hidden`` is now a deprecated spelling of
   ``-fvisibility-global-new-delete=force-hidden`` (``-fvisibility-global-new-delete=``
   is new in this release).
+* ``-fprofile-update`` is enabled for ``-fprofile-generate``.
 
 Removed Compiler Flags
 -------------------------
@@ -860,6 +892,9 @@ Bug Fixes in This Version
   Fixes (`#78290 <https://github.com/llvm/llvm-project/issues/78290>`_)
 - Fixed assertion failure with deleted overloaded unary operators.
   Fixes (`#78314 <https://github.com/llvm/llvm-project/issues/78314>`_)
+- The XCOFF object file format does not support aliases to symbols having common
+  linkage. Clang now diagnoses the use of an alias for a common symbol when
+  compiling for AIX.
 
 - Clang now doesn't produce false-positive warning `-Wconstant-logical-operand`
   for logical operators in C23.
@@ -1261,6 +1296,16 @@ CUDA Support
 - Clang now supports CUDA SDK up to 12.3
 - Added support for sm_90a
 
+PowerPC Support
+^^^^^^^^^^^^^^^
+
+- Added ``nmmintrin.h`` to intrinsics headers.
+- Added ``__builtin_ppc_fence`` as barrier of code motion, and
+  ``__builtin_ppc_mffsl`` for corresponding instruction.
+- Supported ``__attribute__((target("tune=cpu")))``.
+- Emit ``float-abi`` module flag on 64-bit ELFv2 PowerPC targets if
+  ``long double`` type is used in current module.
+
 AIX Support
 ^^^^^^^^^^^
 
@@ -1269,6 +1314,10 @@ AIX Support
   base is encoded as an immediate operand.
   This access sequence is not used for TLS variables larger than 32KB, and is
   currently only supported on 64-bit mode.
+- Inline assembler supports VSR register in pure digits.
+- Enabled ThinLTO support. Requires AIX 7.2 TL5 SP7 or newer, or AIX 7.3 TL2
+  or newer. Similar to the LTO support on AIX, ThinLTO is implemented with
+  the libLTO.so plugin.
 
 WebAssembly Support
 ^^^^^^^^^^^^^^^^^^^
@@ -1332,6 +1381,8 @@ libclang
 - Exposed arguments of ``clang::annotate``.
 - ``clang::getCursorKindForDecl`` now recognizes linkage specifications such as
   ``extern "C"`` and reports them as ``CXCursor_LinkageSpec``.
+- Changed the libclang library on AIX to export only the necessary symbols to
+  prevent issues of resolving to the wrong duplicate symbol.
 
 Static Analyzer
 ---------------
@@ -1343,9 +1394,6 @@ New features
   of static analysis tools, such as the Clang Static Analyzer.
   `Documentation <https://clang.llvm.org/docs/AttributeReference.html#suppress>`__.
 
-- Added support for the ``cleanup`` attribute.
-  `Documentation <https://clang.llvm.org/docs/AttributeReference.html#cleanup>`__.
-
 - Support "Deducing this" (P0847R7). (Worked out of the box)
   (`af4751738db8 <https://github.com/llvm/llvm-project/commit/af4751738db89a142a8880c782d12d4201b222a8>`__)
 
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index e8d03fc2690235..175bedbfb4d01c 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -5815,6 +5815,18 @@ def mvis3 : Flag<["-"], "mvis3">, Group<m_sparc_Features_Group>;
 def mno_vis3 : Flag<["-"], "mno-vis3">, Group<m_sparc_Features_Group>;
 def mhard_quad_float : Flag<["-"], "mhard-quad-float">, Group<m_sparc_Features_Group>;
 def msoft_quad_float : Flag<["-"], "msoft-quad-float">, Group<m_sparc_Features_Group>;
+foreach i = 1 ... 7 in
+  def ffixed_g#i : Flag<["-"], "ffixed-g"#i>, Group<m_sparc_Features_Group>,
+    HelpText<"Reserve the G"#i#" register (SPARC only)">;
+foreach i = 0 ... 5 in
+  def ffixed_o#i : Flag<["-"], "ffixed-o"#i>, Group<m_sparc_Features_Group>,
+    HelpText<"Reserve the O"#i#" register (SPARC only)">;
+foreach i = 0 ... 7 in
+  def ffixed_l#i : Flag<["-"], "ffixed-l"#i>, Group<m_sparc_Features_Group>,
+    HelpText<"Reserve the L"#i#" register (SPARC only)">;
+foreach i = 0 ... 5 in
+  def ffixed_i#i : Flag<["-"], "ffixed-i"#i>, Group<m_sparc_Features_Group>,
+    HelpText<"Reserve the I"#i#" register (SPARC only)">;
 } // let Flags = [TargetSpecific]
 
 // M68k features flags
diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp
index 336b7a5e3d727d..3036f461c1ded1 100644
--- a/clang/lib/Basic/Targets/AArch64.cpp
+++ b/clang/lib/Basic/Targets/AArch64.cpp
@@ -1187,6 +1187,8 @@ TargetInfo::BuiltinVaListKind AArch64TargetInfo::getBuiltinVaListKind() const {
 }
 
 const char *const AArch64TargetInfo::GCCRegNames[] = {
+    // clang-format off
+
     // 32-bit Integer registers
     "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7", "w8", "w9", "w10", "w11",
     "w12", "w13", "w14", "w15", "w16", "w17", "w18", "w19", "w20", "w21", "w22",
@@ -1223,7 +1225,12 @@ const char *const AArch64TargetInfo::GCCRegNames[] = {
 
     // SVE predicate-as-counter registers
     "pn0",  "pn1",  "pn2",  "pn3",  "pn4",  "pn5",  "pn6",  "pn7",  "pn8",
-    "pn9",  "pn10", "pn11", "pn12", "pn13", "pn14", "pn15"
+    "pn9",  "pn10", "pn11", "pn12", "pn13", "pn14", "pn15",
+
+    // SME registers
+    "za", "zt0",
+
+    // clang-format on
 };
 
 ArrayRef<const char *> AArch64TargetInfo::getGCCRegNames() const {
diff --git a/clang/lib/Driver/ToolChains/Arch/Sparc.cpp b/clang/lib/Driver/ToolChains/Arch/Sparc.cpp
index 22e583021515e5..ae1a4ba7882627 100644
--- a/clang/lib/Driver/ToolChains/Arch/Sparc.cpp
+++ b/clang/lib/Driver/ToolChains/Arch/Sparc.cpp
@@ -178,4 +178,85 @@ void sparc::getSparcTargetFeatures(const Driver &D, const ArgList &Args,
     else
       Features.push_back("-hard-quad-float");
   }
+
+  if (Args.hasArg(options::OPT_ffixed_g1))
+    Features.push_back("+reserve-g1");
+
+  if (Args.hasArg(options::OPT_ffixed_g2))
+    Features.push_back("+reserve-g2");
+
+  if (Args.hasArg(options::OPT_ffixed_g3))
+    Features.push_back("+reserve-g3");
+
+  if (Args.hasArg(options::OPT_ffixed_g4))
+    Features.push_back("+reserve-g4");
+
+  if (Args.hasArg(options::OPT_ffixed_g5))
+    Features.push_back("+reserve-g5");
+
+  if (Args.hasArg(options::OPT_ffixed_g6))
+    Features.push_back("+reserve-g6");
+
+  if (Args.hasArg(options::OPT_ffixed_g7))
+    Features.push_back("+reserve-g7");
+
+  if (Args.hasArg(options::OPT_ffixed_o0))
+    Features.push_back("+reserve-o0");
+
+  if (Args.hasArg(options::OPT_ffixed_o1))
+    Features.push_back("+reserve-o1");
+
+  if (Args.hasArg(options::OPT_ffixed_o2))
+    Features.push_back("+reserve-o2");
+
+  if (Args.hasArg(options::OPT_ffixed_o3))
+    Features.push_back("+reserve-o3");
+
+  if (Args.hasArg(options::OPT_ffixed_o4))
+    Features.push_back("+reserve-o4");
+
+  if (Args.hasArg(options::OPT_ffixed_o5))
+    Features.push_back("+reserve-o5");
+
+  if (Args.hasArg(options::OPT_ffixed_l0))
+    Features.push_back("+reserve-l0");
+
+  if (Args.hasArg(options::OPT_ffixed_l1))
+    Features.push_back("+reserve-l1");
+
+  if (Args.hasArg(options::OPT_ffixed_l2))
+    Features.push_back("+reserve-l2");
+
+  if (Args.hasArg(options::OPT_ffixed_l3))
+    Features.push_back("+reserve-l3");
+
+  if (Args.hasArg(options::OPT_ffixed_l4))
+    Features.push_back("+reserve-l4");
+
+  if (Args.hasArg(options::OPT_ffixed_l5))
+    Features.push_back("+reserve-l5");
+
+  if (Args.hasArg(options::OPT_ffixed_l6))
+    Features.push_back("+reserve-l6");
+
+  if (Args.hasArg(options::OPT_ffixed_l7))
+    Features.push_back("+reserve-l7");
+
+  if (Args.hasArg(options::OPT_ffixed_i0))
+    Features.push_back("+reserve-i0");
+
+  if (Args.hasArg(options::OPT_ffixed_i1))
+    Features.push_back("+reserve-i1");
+
+  if (Args.hasArg(options::OPT_ffixed_i2))
+    Features.push_back("+reserve-i2");
+
+  if (Args.hasArg(options::OPT_ffixed_i3))
+    Features.push_back("+reserve-i3");
+
+  if (Args.hasArg(options::OPT_ffixed_i4))
+    Features.push_back("+reserve-i4");
+
+  if (Args.hasArg(options::OPT_ffixed_i5))
+    Features.push_back("+reserve-i5");
 }
diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp
index b904e0e56d9eb3..57391979887078 100644
--- a/clang/lib/Format/UnwrappedLineParser.cpp
+++ b/clang/lib/Format/UnwrappedLineParser.cpp
@@ -2515,7 +2515,7 @@ bool UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType) {
         parseChildBlock();
       break;
     case tok::r_paren:
-      if (!MightBeStmtExpr &&
+      if (!MightBeStmtExpr && !Line->InMacroBody &&
           Style.RemoveParentheses > FormatStyle::RPS_Leave) {
         const auto *Prev = LeftParen->Previous;
         const auto *Next = Tokens->peekNextToken();
diff --git a/clang/lib/Serialization/ASTReaderDecl.cpp b/clang/lib/Serialization/ASTReaderDecl.cpp
index 1fadd8039462d4..321c11e55c14e3 100644
--- a/clang/lib/Serialization/ASTReaderDecl.cpp
+++ b/clang/lib/Serialization/ASTReaderDecl.cpp
@@ -800,11 +800,12 @@ void ASTDeclReader::VisitEnumDecl(EnumDecl *ED) {
   BitsUnpacker EnumDeclBits(Record.readInt());
   ED->setNumPositiveBits(EnumDeclBits.getNextBits(/*Width=*/8));
   ED->setNumNegativeBits(EnumDeclBits.getNextBits(/*Width=*/8));
+  bool ShouldSkipCheckingODR = EnumDeclBits.getNextBit();
   ED->setScoped(EnumDeclBits.getNextBit());
   ED->setScopedUsingClassTag(EnumDeclBits.getNextBit());
   ED->setFixed(EnumDeclBits.getNextBit());
 
-  if (!shouldSkipCheckingODR(ED)) {
+  if (!ShouldSkipCheckingODR) {
     ED->setHasODRHash(true);
     ED->ODRHash = Record.readInt();
   }
@@ -1073,6 +1074,7 @@ void ASTDeclReader::VisitFunctionDecl(FunctionDecl *FD) {
 
   FD->setCachedLinkage((Linkage)FunctionDeclBits.getNextBits(/*Width=*/3));
   FD->setStorageClass((StorageClass)FunctionDeclBits.getNextBits(/*Width=*/3));
+  bool ShouldSkipCheckingODR = FunctionDeclBits.getNextBit();
   FD->setInlineSpecified(FunctionDeclBits.getNextBit());
   FD->setImplicitlyInline(FunctionDeclBits.getNextBit());
   FD->setHasSkippedBody(FunctionDeclBits.getNextBit());
@@ -1102,7 +1104,7 @@ void ASTDeclReader::VisitFunctionDecl(FunctionDecl *FD) {
   if (FD->isExplicitlyDefaulted())
     FD->setDefaultLoc(readSourceLocation());
 
-  if (!shouldSkipCheckingODR(FD)) {
+  if (!ShouldSkipCheckingODR) {
     FD->ODRHash = Record.readInt();
     FD->setHasODRHash(true);
   }
@@ -1973,6 +1975,8 @@ void ASTDeclReader::ReadCXXDefinitionData(
 
   BitsUnpacker CXXRecordDeclBits = Record.readInt();
 
+  bool ShouldSkipCheckingODR = CXXRecordDeclBits.getNextBit();
+
 #define FIELD(Name, Width, Merge)                                              \
   if (!CXXRecordDeclBits.canGetNextNBits(Width))                         \
     CXXRecordDeclBits.updateValue(Record.readInt());                           \
@@ -1982,7 +1986,7 @@ void ASTDeclReader::ReadCXXDefinitionData(
 #undef FIELD
 
   // We only perform ODR checks for decls not in GMF.
-  if (!shouldSkipCheckingODR(D)) {
+  if (!ShouldSkipCheckingODR) {
     // Note: the caller has deserialized the IsLambda bit already.
     Data.ODRHash = Record.readInt();
     Data.HasODRHash = true;
diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index 3b79a9238d1af3..73018c1170d8f2 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -6010,6 +6010,9 @@ void ASTRecordWriter::AddCXXDefinitionData(const CXXRecordDecl *D) {
 
   BitsPacker DefinitionBits;
 
+  bool ShouldSkipCheckingODR = shouldSkipCheckingODR(D);
+  DefinitionBits.addBit(ShouldSkipCheckingODR);
+
 #define FIELD(Name, Width, Merge)                                              \
   if (!DefinitionBits.canWriteNextNBits(Width)) {                              \
     Record->push_back(DefinitionBits);                                         \
@@ -6023,11 +6026,10 @@ void ASTRecordWriter::AddCXXDefinitionData(const CXXRecordDecl *D) {
   Record->push_back(DefinitionBits);
 
   // We only perform ODR checks for decls not in GMF.
-  if (!shouldSkipCheckingODR(D)) {
+  if (!ShouldSkipCheckingODR)
     // getODRHash will compute the ODRHash if it has not been previously
     // computed.
     Record->push_back(D->getODRHash());
-  }
 
   bool ModulesDebugInfo =
       Writer->Context->getLangOpts().ModulesDebugInfo && !D->isDependentType();
diff --git a/clang/lib/Serialization/ASTWriterDecl.cpp b/clang/lib/Serialization/ASTWriterDecl.cpp
index f224075643e998..e73800100e3ccf 100644
--- a/clang/lib/Serialization/ASTWriterDecl.cpp
+++ b/clang/lib/Serialization/ASTWriterDecl.cpp
@@ -488,13 +488,15 @@ void ASTDeclWriter::VisitEnumDecl(EnumDecl *D) {
   BitsPacker EnumDeclBits;
   EnumDeclBits.addBits(D->getNumPositiveBits(), /*BitWidth=*/8);
   EnumDeclBits.addBits(D->getNumNegativeBits(), /*BitWidth=*/8);
+  bool ShouldSkipCheckingODR = shouldSkipCheckingODR(D);
+  EnumDeclBits.addBit(ShouldSkipCheckingODR);
   EnumDeclBits.addBit(D->isScoped());
   EnumDeclBits.addBit(D->isScopedUsingClassTag());
   EnumDeclBits.addBit(D->isFixed());
   Record.push_back(EnumDeclBits);
 
   // We only perform ODR checks for decls not in GMF.
-  if (!shouldSkipCheckingODR(D))
+  if (!ShouldSkipCheckingODR)
     Record.push_back(D->getODRHash());
 
   if (MemberSpecializationInfo *MemberInfo = D->getMemberSpecializationInfo()) {
@@ -678,6 +680,8 @@ void ASTDeclWriter::VisitFunctionDecl(FunctionDecl *D) {
   // FIXME: stable encoding
   FunctionDeclBits.addBits(llvm::to_underlying(D->getLinkageInternal()), 3);
   FunctionDeclBits.addBits((uint32_t)D->getStorageClass(), /*BitWidth=*/3);
+  bool ShouldSkipCheckingODR = shouldSkipCheckingODR(D);
+  FunctionDeclBits.addBit(ShouldSkipCheckingODR);
   FunctionDeclBits.addBit(D->isInlineSpecified());
   FunctionDeclBits.addBit(D->isInlined());
   FunctionDeclBits.addBit(D->hasSkippedBody());
@@ -704,7 +708,7 @@ void ASTDeclWriter::VisitFunctionDecl(FunctionDecl *D) {
     Record.AddSourceLocation(D->getDefaultLoc());
 
   // We only perform ODR checks for decls not in GMF.
-  if (!shouldSkipCheckingODR(D))
+  if (!ShouldSkipCheckingODR)
     Record.push_back(D->getODRHash());
 
   if (D->isDefaulted()) {
@@ -2137,12 +2141,13 @@ getFunctionDeclAbbrev(serialization::DeclCode Code) {
   Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 11)); // IDNS
   Abv->Add(BitCodeAbbrevOp(
       BitCodeAbbrevOp::Fixed,
-      27)); // Packed Function Bits: StorageClass, Inline, InlineSpecified,
+      28)); // Packed Function Bits: StorageClass, Inline, InlineSpecified,
             // VirtualAsWritten, Pure, HasInheritedProto, HasWrittenProto,
             // Deleted, Trivial, TrivialForCall, Defaulted, ExplicitlyDefaulted,
             // IsIneligibleOrNotSelected, ImplicitReturnZero, Constexpr,
             // UsesSEHTry, SkippedBody, MultiVersion, LateParsed,
-            // FriendConstraintRefersToEnclosingTemplate, Linkage
+            // FriendConstraintRefersToEnclosingTemplate, Linkage,
+            // ShouldSkipCheckingODR
   Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));    // LocEnd
   Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // ODRHash
   // This Array slurps the rest of the record. Fortunately we want to encode
@@ -2269,7 +2274,7 @@ void ASTWriter::WriteDeclAbbrevs() {
   Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));   // AddTypeRef
   Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));   // IntegerType
   Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));   // getPromotionType
-  Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 19)); // Enum Decl Bits
+  Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 20)); // Enum Decl Bits
   Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));// ODRHash
   Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));   // InstantiatedMembEnum
   // DC
diff --git a/clang/test/CodeGen/aarch64-inline-asm.c b/clang/test/CodeGen/aarch64-inline-asm.c
index 75e9a8c46b8769..8ddee560b11da4 100644
--- a/clang/test/CodeGen/aarch64-inline-asm.c
+++ b/clang/test/CodeGen/aarch64-inline-asm.c
@@ -95,3 +95,11 @@ void test_reduced_gpr_constraints(int var32, long var64) {
 // CHECK: [[ARG2:%.+]] = load i64, ptr
 // CHECK: call void asm sideeffect "add x0, x0, $0", "@3Ucj,~{x0}"(i64 [[ARG2]])
 }
+
+void test_sme_constraints(){
+  asm("movt zt0[3, mul vl], z0" : : : "za");
+// CHECK: call void asm sideeffect "movt zt0[3, mul vl], z0", "~{za}"()
+
+  asm("movt zt0[3, mul vl], z0" : : : "zt0");
+// CHECK: call void asm sideeffect "movt zt0[3, mul vl], z0", "~{zt0}"()
+}
\ No newline at end of file
diff --git a/clang/test/Driver/sparc-fixed-register.c b/clang/test/Driver/sparc-fixed-register.c
new file mode 100644
index 00000000000000..24880b9c9d86fd
--- /dev/null
+++ b/clang/test/Driver/sparc-fixed-register.c
@@ -0,0 +1,181 @@
+// RUN: %clang --target=sparc-none-gnu -ffixed-g1 -### %s 2> %t
+// RUN: FileCheck --check-prefix=CHECK-FIXED-G1 < %t %s
+// CHECK-FIXED-G1: "-target-feature" "+reserve-g1"
+
+// RUN: %clang --target=sparc-none-gnu -ffixed-g2 -### %s 2> %t
+// RUN: FileCheck --check-prefix=CHECK-FIXED-G2 < %t %s
+// CHECK-FIXED-G2: "-target-feature" "+reserve-g2"
+
+// RUN: %clang --target=sparc-none-gnu -ffixed-g3 -### %s 2> %t
+// RUN: FileCheck --check-prefix=CHECK-FIXED-G3 < %t %s
+// CHECK-FIXED-G3: "-target-feature" "+reserve-g3"
+
+// RUN: %clang --target=sparc-none-gnu -ffixed-g4 -### %s 2> %t
+// RUN: FileCheck --check-prefix=CHECK-FIXED-G4 < %t %s
+// CHECK-FIXED-G4: "-target-feature" "+reserve-g4"
+
+// RUN: %clang --target=sparc-none-gnu -ffixed-g5 -### %s 2> %t
+// RUN: FileCheck --check-prefix=CHECK-FIXED-G5 < %t %s
+// CHECK-FIXED-G5: "-target-feature" "+reserve-g5"
+
+// RUN: %clang --target=sparc-none-gnu -ffixed-g6 -### %s 2> %t
+// RUN: FileCheck --check-prefix=CHECK-FIXED-G6 < %t %s
+// CHECK-FIXED-G6: "-target-feature" "+reserve-g6"
+
+// RUN: %clang --target=sparc-none-gnu -ffixed-g7 -### %s 2> %t
+// RUN: FileCheck --check-prefix=CHECK-FIXED-G7 < %t %s
+// CHECK-FIXED-G7: "-target-feature" "+reserve-g7"
+
+// RUN: %clang --target=sparc-none-gnu -ffixed-o0 -### %s 2> %t
+// RUN: FileCheck --check-prefix=CHECK-FIXED-O0 < %t %s
+// CHECK-FIXED-O0: "-target-feature" "+reserve-o0"
+
+// RUN: %clang --target=sparc-none-gnu -ffixed-o1 -### %s 2> %t
+// RUN: FileCheck --check-prefix=CHECK-FIXED-O1 < %t %s
+// CHECK-FIXED-O1: "-target-feature" "+reserve-o1"
+
+// RUN: %clang --target=sparc-none-gnu -ffixed-o2 -### %s 2> %t
+// RUN: FileCheck --check-prefix=CHECK-FIXED-O2 < %t %s
+// CHECK-FIXED-O2: "-target-feature" "+reserve-o2"
+
+// RUN: %clang --target=sparc-none-gnu -ffixed-o3 -### %s 2> %t
+// RUN: FileCheck --check-prefix=CHECK-FIXED-O3 < %t %s
+// CHECK-FIXED-O3: "-target-feature" "+reserve-o3"
+
+// RUN: %clang --target=sparc-none-gnu -ffixed-o4 -### %s 2> %t
+// RUN: FileCheck --check-prefix=CHECK-FIXED-O4 < %t %s
+// CHECK-FIXED-O4: "-target-feature" "+reserve-o4"
+
+// RUN: %clang --target=sparc-none-gnu -ffixed-o5 -### %s 2> %t
+// RUN: FileCheck --check-prefix=CHECK-FIXED-O5 < %t %s
+// CHECK-FIXED-O5: "-target-feature" "+reserve-o5"
+
+// RUN: %clang --target=sparc-none-gnu -ffixed-l0 -### %s 2> %t
+// RUN: FileCheck --check-prefix=CHECK-FIXED-L0 < %t %s
+// CHECK-FIXED-L0: "-target-feature" "+reserve-l0"
+
+// RUN: %clang --target=sparc-none-gnu -ffixed-l1 -### %s 2> %t
+// RUN: FileCheck --check-prefix=CHECK-FIXED-L1 < %t %s
+// CHECK-FIXED-L1: "-target-feature" "+reserve-l1"
+
+// RUN: %clang --target=sparc-none-gnu -ffixed-l2 -### %s 2> %t
+// RUN: FileCheck --check-prefix=CHECK-FIXED-L2 < %t %s
+// CHECK-FIXED-L2: "-target-feature" "+reserve-l2"
+
+// RUN: %clang --target=sparc-none-gnu -ffixed-l3 -### %s 2> %t
+// RUN: FileCheck --check-prefix=CHECK-FIXED-L3 < %t %s
+// CHECK-FIXED-L3: "-target-feature" "+reserve-l3"
+
+// RUN: %clang --target=sparc-none-gnu -ffixed-l4 -### %s 2> %t
+// RUN: FileCheck --check-prefix=CHECK-FIXED-L4 < %t %s
+// CHECK-FIXED-L4: "-target-feature" "+reserve-l4"
+
+// RUN: %clang --target=sparc-none-gnu -ffixed-l5 -### %s 2> %t
+// RUN: FileCheck --check-prefix=CHECK-FIXED-L5 < %t %s
+// CHECK-FIXED-L5: "-target-feature" "+reserve-l5"
+
+// RUN: %clang --target=sparc-none-gnu -ffixed-l6 -### %s 2> %t
+// RUN: FileCheck --check-prefix=CHECK-FIXED-L6 < %t %s
+// CHECK-FIXED-L6: "-target-feature" "+reserve-l6"
+
+// RUN: %clang --target=sparc-none-gnu -ffixed-l7 -### %s 2> %t
+// RUN: FileCheck --check-prefix=CHECK-FIXED-L7 < %t %s
+// CHECK-FIXED-L7: "-target-feature" "+reserve-l7"
+
+// RUN: %clang --target=sparc-none-gnu -ffixed-i0 -### %s 2> %t
+// RUN: FileCheck --check-prefix=CHECK-FIXED-I0 < %t %s
+// CHECK-FIXED-I0: "-target-feature" "+reserve-i0"
+
+// RUN: %clang --target=sparc-none-gnu -ffixed-i1 -### %s 2> %t
+// RUN: FileCheck --check-prefix=CHECK-FIXED-I1 < %t %s
+// CHECK-FIXED-I1: "-target-feature" "+reserve-i1"
+
+// RUN: %clang --target=sparc-none-gnu -ffixed-i2 -### %s 2> %t
+// RUN: FileCheck --check-prefix=CHECK-FIXED-I2 < %t %s
+// CHECK-FIXED-I2: "-target-feature" "+reserve-i2"
+
+// RUN: %clang --target=sparc-none-gnu -ffixed-i3 -### %s 2> %t
+// RUN: FileCheck --check-prefix=CHECK-FIXED-I3 < %t %s
+// CHECK-FIXED-I3: "-target-feature" "+reserve-i3"
+
+// RUN: %clang --target=sparc-none-gnu -ffixed-i4 -### %s 2> %t
+// RUN: FileCheck --check-prefix=CHECK-FIXED-I4 < %t %s
+// CHECK-FIXED-I4: "-target-feature" "+reserve-i4"
+
+// RUN: %clang --target=sparc-none-gnu -ffixed-i5 -### %s 2> %t
+// RUN: FileCheck --check-prefix=CHECK-FIXED-I5 < %t %s
+// CHECK-FIXED-I5: "-target-feature" "+reserve-i5"
+
+// Test multiple of reserve-* options together.
+// RUN: %clang --target=sparc-none-gnu \
+// RUN: -ffixed-g1 \
+// RUN: -ffixed-o2 \
+// RUN: -ffixed-l3 \
+// RUN: -ffixed-i4 \
+// RUN: -### %s 2> %t
+// RUN: FileCheck \
+// RUN: --check-prefix=CHECK-FIXED-G1 \
+// RUN: --check-prefix=CHECK-FIXED-O2 \
+// RUN: --check-prefix=CHECK-FIXED-L3 \
+// RUN: --check-prefix=CHECK-FIXED-I4 \
+// RUN: < %t %s
+
+// Test all reserve-* options together.
+// RUN: %clang --target=sparc-none-gnu \
+// RUN: -ffixed-g1 \
+// RUN: -ffixed-g2 \
+// RUN: -ffixed-g3 \
+// RUN: -ffixed-g4 \
+// RUN: -ffixed-g5 \
+// RUN: -ffixed-g6 \
+// RUN: -ffixed-g7 \
+// RUN: -ffixed-o0 \
+// RUN: -ffixed-o1 \
+// RUN: -ffixed-o2 \
+// RUN: -ffixed-o3 \
+// RUN: -ffixed-o4 \
+// RUN: -ffixed-o5 \
+// RUN: -ffixed-l0 \
+// RUN: -ffixed-l1 \
+// RUN: -ffixed-l2 \
+// RUN: -ffixed-l3 \
+// RUN: -ffixed-l4 \
+// RUN: -ffixed-l5 \
+// RUN: -ffixed-l6 \
+// RUN: -ffixed-l7 \
+// RUN: -ffixed-i0 \
+// RUN: -ffixed-i1 \
+// RUN: -ffixed-i2 \
+// RUN: -ffixed-i3 \
+// RUN: -ffixed-i4 \
+// RUN: -ffixed-i5 \
+// RUN: -### %s 2> %t
+// RUN: FileCheck \
+// RUN: --check-prefix=CHECK-FIXED-G1 \
+// RUN: --check-prefix=CHECK-FIXED-G2 \
+// RUN: --check-prefix=CHECK-FIXED-G3 \
+// RUN: --check-prefix=CHECK-FIXED-G4 \
+// RUN: --check-prefix=CHECK-FIXED-G5 \
+// RUN: --check-prefix=CHECK-FIXED-G6 \
+// RUN: --check-prefix=CHECK-FIXED-G7 \
+// RUN: --check-prefix=CHECK-FIXED-O0 \
+// RUN: --check-prefix=CHECK-FIXED-O1 \
+// RUN: --check-prefix=CHECK-FIXED-O2 \
+// RUN: --check-prefix=CHECK-FIXED-O3 \
+// RUN: --check-prefix=CHECK-FIXED-O4 \
+// RUN: --check-prefix=CHECK-FIXED-O5 \
+// RUN: --check-prefix=CHECK-FIXED-L0 \
+// RUN: --check-prefix=CHECK-FIXED-L1 \
+// RUN: --check-prefix=CHECK-FIXED-L2 \
+// RUN: --check-prefix=CHECK-FIXED-L3 \
+// RUN: --check-prefix=CHECK-FIXED-L4 \
+// RUN: --check-prefix=CHECK-FIXED-L5 \
+// RUN: --check-prefix=CHECK-FIXED-L6 \
+// RUN: --check-prefix=CHECK-FIXED-L7 \
+// RUN: --check-prefix=CHECK-FIXED-I0 \
+// RUN: --check-prefix=CHECK-FIXED-I1 \
+// RUN: --check-prefix=CHECK-FIXED-I2 \
+// RUN: --check-prefix=CHECK-FIXED-I3 \
+// RUN: --check-prefix=CHECK-FIXED-I4 \
+// RUN: --check-prefix=CHECK-FIXED-I5 \
+// RUN: < %t %s
diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp
index a471e36f8d6825..0beba12dda62ae 100644
--- a/clang/unittests/Format/FormatTest.cpp
+++ b/clang/unittests/Format/FormatTest.cpp
@@ -26856,6 +26856,7 @@ TEST_F(FormatTest, RemoveParentheses) {
   EXPECT_EQ(Style.RemoveParentheses, FormatStyle::RPS_Leave);
 
   Style.RemoveParentheses = FormatStyle::RPS_MultipleParentheses;
+  verifyFormat("#define Foo(...) foo((__VA_ARGS__))", Style);
   verifyFormat("int x __attribute__((aligned(16))) = 0;", Style);
   verifyFormat("decltype((foo->bar)) baz;", Style);
   verifyFormat("class __declspec(dllimport) X {};",
@@ -26890,6 +26891,7 @@ TEST_F(FormatTest, RemoveParentheses) {
   verifyFormat("return (({ 0; }));", "return ((({ 0; })));", Style);
 
   Style.RemoveParentheses = FormatStyle::RPS_ReturnStatement;
+  verifyFormat("#define Return0 return (0);", Style);
   verifyFormat("return 0;", "return (0);", Style);
   verifyFormat("co_return 0;", "co_return ((0));", Style);
   verifyFormat("return 0;", "return (((0)));", Style);
diff --git a/clang/unittests/Serialization/CMakeLists.txt b/clang/unittests/Serialization/CMakeLists.txt
index 10d7de970c643d..e7eebd0cb98239 100644
--- a/clang/unittests/Serialization/CMakeLists.txt
+++ b/clang/unittests/Serialization/CMakeLists.txt
@@ -10,6 +10,7 @@ add_clang_unittest(SerializationTests
   InMemoryModuleCacheTest.cpp
   ModuleCacheTest.cpp
   NoCommentsTest.cpp
+  PreambleInNamedModulesTest.cpp
   SourceLocationEncodingTest.cpp
   VarDeclConstantInitTest.cpp
   )
diff --git a/clang/unittests/Serialization/PreambleInNamedModulesTest.cpp b/clang/unittests/Serialization/PreambleInNamedModulesTest.cpp
new file mode 100644
index 00000000000000..d26e1cb633654f
--- /dev/null
+++ b/clang/unittests/Serialization/PreambleInNamedModulesTest.cpp
@@ -0,0 +1,132 @@
+//===- unittests/Serialization/PreambleInNamedModulesTest.cpp -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Frontend/CompilerInstance.h"
+#include "clang/Frontend/CompilerInvocation.h"
+#include "clang/Frontend/FrontendActions.h"
+#include "clang/Frontend/PrecompiledPreamble.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include "gtest/gtest.h"
+
+using namespace llvm;
+using namespace clang;
+
+namespace {
+
+class PreambleInNamedModulesTest : public ::testing::Test {
+  void SetUp() override {
+    ASSERT_FALSE(sys::fs::createUniqueDirectory("modules-test", TestDir));
+  }
+
+  void TearDown() override { sys::fs::remove_directories(TestDir); }
+
+public:
+  using PathType = SmallString<256>;
+
+  PathType TestDir;
+
+  void addFile(StringRef Path, StringRef Contents, PathType &AbsPath) {
+    ASSERT_FALSE(sys::path::is_absolute(Path));
+
+    AbsPath = TestDir;
+    sys::path::append(AbsPath, Path);
+
+    ASSERT_FALSE(
+        sys::fs::create_directories(llvm::sys::path::parent_path(AbsPath)));
+
+    std::error_code EC;
+    llvm::raw_fd_ostream OS(AbsPath, EC);
+    ASSERT_FALSE(EC);
+    OS << Contents;
+  }
+
+  void addFile(StringRef Path, StringRef Contents) {
+    PathType UnusedAbsPath;
+    addFile(Path, Contents, UnusedAbsPath);
+  }
+};
+
+// Testing that the use of Preamble in named modules can work basically.
+// See https://github.com/llvm/llvm-project/issues/80570
+TEST_F(PreambleInNamedModulesTest, BasicTest) {
+  addFile("foo.h", R"cpp(
+enum class E {
+    A,
+    B,
+    C,
+    D
+};
+  )cpp");
+
+  PathType MainFilePath;
+  addFile("A.cppm", R"cpp(
+module;
+#include "foo.h"
+export module A;
+export using ::E;
+  )cpp",
+          MainFilePath);
+
+  IntrusiveRefCntPtr<DiagnosticsEngine> Diags =
+      CompilerInstance::createDiagnostics(new DiagnosticOptions());
+  IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS =
+      llvm::vfs::createPhysicalFileSystem();
+
+  CreateInvocationOptions CIOpts;
+  CIOpts.Diags = Diags;
+  CIOpts.VFS = VFS;
+
+  const char *Args[] = {"clang++", "-std=c++20", "-working-directory",
+                        TestDir.c_str(), MainFilePath.c_str()};
+  std::shared_ptr<CompilerInvocation> Invocation =
+      createInvocation(Args, CIOpts);
+  ASSERT_TRUE(Invocation);
+
+  llvm::ErrorOr<std::unique_ptr<MemoryBuffer>> ContentsBuffer =
+      llvm::MemoryBuffer::getFile(MainFilePath, /*IsText=*/true);
+  EXPECT_TRUE(ContentsBuffer);
+  std::unique_ptr<MemoryBuffer> Buffer = std::move(*ContentsBuffer);
+
+  PreambleBounds Bounds =
+      ComputePreambleBounds(Invocation->getLangOpts(), *Buffer, 0);
+
+  PreambleCallbacks Callbacks;
+  llvm::ErrorOr<PrecompiledPreamble> BuiltPreamble = PrecompiledPreamble::Build(
+      *Invocation, Buffer.get(), Bounds, *Diags, VFS,
+      std::make_shared<PCHContainerOperations>(),
+      /*StoreInMemory=*/false, /*StoragePath=*/TestDir, Callbacks);
+
+  ASSERT_FALSE(Diags->hasErrorOccurred());
+
+  EXPECT_TRUE(BuiltPreamble);
+  EXPECT_TRUE(BuiltPreamble->CanReuse(*Invocation, *Buffer, Bounds, *VFS));
+  BuiltPreamble->OverridePreamble(*Invocation, VFS, Buffer.get());
+
+  auto Clang = std::make_unique<CompilerInstance>(
+      std::make_shared<PCHContainerOperations>());
+  Clang->setInvocation(std::move(Invocation));
+  Clang->setDiagnostics(Diags.get());
+
+  if (auto VFSWithRemapping = createVFSFromCompilerInvocation(
+          Clang->getInvocation(), Clang->getDiagnostics(), VFS))
+    VFS = VFSWithRemapping;
+
+  Clang->createFileManager(VFS);
+  EXPECT_TRUE(Clang->createTarget());
+
+  Buffer.release();
+
+  SyntaxOnlyAction Action;
+  EXPECT_TRUE(Clang->ExecuteAction(Action));
+  EXPECT_FALSE(Clang->getDiagnosticsPtr()->hasErrorOccurred());
+}
+
+} // namespace
diff --git a/compiler-rt/lib/profile/InstrProfilingFile.c b/compiler-rt/lib/profile/InstrProfilingFile.c
index 867ae73f0d3b27..f3b457d786e6bd 100644
--- a/compiler-rt/lib/profile/InstrProfilingFile.c
+++ b/compiler-rt/lib/profile/InstrProfilingFile.c
@@ -677,6 +677,7 @@ static void initializeProfileForContinuousMode(void) {
       PROF_ERR("Continuous counter sync mode is enabled, but raw profile is not"
                "page-aligned. CurrentFileOffset = %" PRIu64 ", pagesz = %u.\n",
                (uint64_t)CurrentFileOffset, PageSize);
+      fclose(File);
       return;
     }
     if (writeProfileWithFileObject(Filename, File) != 0) {
@@ -692,6 +693,8 @@ static void initializeProfileForContinuousMode(void) {
 
   if (doMerging()) {
     lprofUnlockFileHandle(File);
+  }
+  if (File != NULL) {
     fclose(File);
   }
 }
diff --git a/compiler-rt/lib/profile/InstrProfilingPlatformAIX.c b/compiler-rt/lib/profile/InstrProfilingPlatformAIX.c
index 9f46a98d78ac4e..002bec164d7e85 100644
--- a/compiler-rt/lib/profile/InstrProfilingPlatformAIX.c
+++ b/compiler-rt/lib/profile/InstrProfilingPlatformAIX.c
@@ -195,6 +195,8 @@ static const int dummy_name[0] COMPILER_RT_SECTION(
     COMPILER_RT_SEG INSTR_PROF_NAME_SECT_NAME);
 static int dummy_vnds[0] COMPILER_RT_SECTION(
     COMPILER_RT_SEG INSTR_PROF_VNODES_SECT_NAME);
+static int dummy_orderfile[0] COMPILER_RT_SECTION(
+    COMPILER_RT_SEG INSTR_PROF_ORDERFILE_SECT_NAME);
 
 // To avoid GC'ing of the dummy variables by the linker, reference them in an
 // array and reference the array in the runtime registration code
@@ -206,7 +208,7 @@ static int dummy_vnds[0] COMPILER_RT_SECTION(
 COMPILER_RT_VISIBILITY
 void *__llvm_profile_keep[] = {(void *)&dummy_cnts, (void *)&dummy_bits,
                                (void *)&dummy_data, (void *)&dummy_name,
-                               (void *)&dummy_vnds};
+                               (void *)&dummy_vnds, (void *)&dummy_orderfile};
 #ifdef __GNUC__
 #pragma GCC diagnostic pop
 #endif
diff --git a/compiler-rt/test/profile/AIX/bexpfull-pgo.c b/compiler-rt/test/profile/AIX/bexpfull-pgo.c
new file mode 100644
index 00000000000000..f48242ec6bfeaa
--- /dev/null
+++ b/compiler-rt/test/profile/AIX/bexpfull-pgo.c
@@ -0,0 +1,7 @@
+// RUN: %clang_pgogen %s -bexpall
+// RUN: %clang_pgogen %s -bexpfull
+
+#include <string.h>
+int ar[10];
+int n;
+int main() { memcpy(ar, ar + 1, n); };
diff --git a/libcxx/docs/Modules.rst b/libcxx/docs/Modules.rst
index 533c3fbd2a1eea..ee2b81d3b9e7ca 100644
--- a/libcxx/docs/Modules.rst
+++ b/libcxx/docs/Modules.rst
@@ -218,9 +218,13 @@ Building this project is done with the following steps, assuming the files
 
   $ mkdir build
   $ cmake -G Ninja -S . -B build -DCMAKE_CXX_COMPILER=<path-to-compiler> -DLIBCXX_BUILD=<build>
+  $ ninja -j1 std -C build
   $ ninja -C build
   $ build/main
 
+.. note:: The ``std`` dependencies of ``std.compat`` is not always resolved when
+          building the ``std`` target using multiple jobs.
+
 .. warning:: ``<path-to-compiler>`` should point point to the real binary and
              not to a symlink.
 
diff --git a/libcxx/include/print b/libcxx/include/print
index 7f2b5bac3dcf61..543a540ee4f27d 100644
--- a/libcxx/include/print
+++ b/libcxx/include/print
@@ -32,6 +32,7 @@ namespace std {
 */
 
 #include <__assert> // all public C++ headers provide the assertion handler
+#include <__availability>
 #include <__concepts/same_as.h>
 #include <__config>
 #include <__system_error/system_error.h>
@@ -43,10 +44,6 @@ namespace std {
 #include <string_view>
 #include <version>
 
-#if __has_include(<unistd.h>)
-#  include <unistd.h>
-#endif
-
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #  pragma GCC system_header
 #endif
@@ -68,7 +65,8 @@ _LIBCPP_EXPORTED_FROM_ABI bool __is_windows_terminal(FILE* __stream);
 // Note the function is only implemented on the Windows platform.
 _LIBCPP_EXPORTED_FROM_ABI void __write_to_windows_console(FILE* __stream, wstring_view __view);
 #  endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS
-
+#elif __has_include(<unistd.h>)
+_LIBCPP_EXPORTED_FROM_ABI bool __is_posix_terminal(FILE* __stream);
 #endif // _LIBCPP_WIN32API
 
 #if _LIBCPP_STD_VER >= 23
@@ -195,15 +193,17 @@ inline constexpr bool __use_unicode_execution_charset = _MSVC_EXECUTION_CHARACTE
 inline constexpr bool __use_unicode_execution_charset = true;
 #  endif
 
-_LIBCPP_HIDE_FROM_ABI inline bool __is_terminal(FILE* __stream) {
+_LIBCPP_HIDE_FROM_ABI inline bool __is_terminal([[maybe_unused]] FILE* __stream) {
   // The macro _LIBCPP_TESTING_PRINT_IS_TERMINAL is used to change
   // the behavior in the test. This is not part of the public API.
 #  ifdef _LIBCPP_TESTING_PRINT_IS_TERMINAL
   return _LIBCPP_TESTING_PRINT_IS_TERMINAL(__stream);
+#  elif _LIBCPP_AVAILABILITY_HAS_PRINT == 0
+  return false;
 #  elif defined(_LIBCPP_WIN32API)
   return std::__is_windows_terminal(__stream);
 #  elif __has_include(<unistd.h>)
-  return isatty(fileno(__stream));
+  return std::__is_posix_terminal(__stream);
 #  else
 #    error "Provide a way to determine whether a FILE* is a terminal"
 #  endif
diff --git a/libcxx/lib/abi/CHANGELOG.TXT b/libcxx/lib/abi/CHANGELOG.TXT
index 1179c253f18c8f..7ff604959f4d5c 100644
--- a/libcxx/lib/abi/CHANGELOG.TXT
+++ b/libcxx/lib/abi/CHANGELOG.TXT
@@ -16,6 +16,14 @@ New entries should be added directly below the "Version" header.
 Version 18.0
 ------------
 
+* [libc++] Moves is_terminal to the dylib
+
+  The patch moves the POSIX implementation of is_terminal to the dylib. This is
+  needed to avoid using <unistd.h> in public headers.
+
+  All platforms
+  Symbol added: _ZNSt6__ndk119__is_posix_terminalEP7__sFILE
+
 * [libc++abi] Implement __cxa_init_primary_exception and use it to optimize std::make_exception_ptr (#65534)
 
   This patch implements __cxa_init_primary_exception, an extension to the Itanium C++ ABI.
diff --git a/libcxx/lib/abi/arm64-apple-darwin.libcxxabi.v1.stable.exceptions.nonew.abilist b/libcxx/lib/abi/arm64-apple-darwin.libcxxabi.v1.stable.exceptions.nonew.abilist
index c2fea4d8adb420..2064f45bf8c084 100644
--- a/libcxx/lib/abi/arm64-apple-darwin.libcxxabi.v1.stable.exceptions.nonew.abilist
+++ b/libcxx/lib/abi/arm64-apple-darwin.libcxxabi.v1.stable.exceptions.nonew.abilist
@@ -1495,6 +1495,7 @@
 {'is_defined': True, 'name': '__ZNSt3__118shared_timed_mutex8try_lockEv', 'type': 'FUNC'}
 {'is_defined': True, 'name': '__ZNSt3__118shared_timed_mutexC1Ev', 'type': 'FUNC'}
 {'is_defined': True, 'name': '__ZNSt3__118shared_timed_mutexC2Ev', 'type': 'FUNC'}
+{'is_defined': True, 'name': '__ZNSt3__119__is_posix_terminalEP7__sFILE', 'type': 'FUNC'}
 {'is_defined': True, 'name': '__ZNSt3__119__shared_mutex_base11lock_sharedEv', 'type': 'FUNC'}
 {'is_defined': True, 'name': '__ZNSt3__119__shared_mutex_base13unlock_sharedEv', 'type': 'FUNC'}
 {'is_defined': True, 'name': '__ZNSt3__119__shared_mutex_base15try_lock_sharedEv', 'type': 'FUNC'}
diff --git a/libcxx/lib/abi/i686-linux-android21.libcxxabi.v1.stable.exceptions.nonew.abilist b/libcxx/lib/abi/i686-linux-android21.libcxxabi.v1.stable.exceptions.nonew.abilist
index a60f099b532052..fec3a4505a0c6d 100644
--- a/libcxx/lib/abi/i686-linux-android21.libcxxabi.v1.stable.exceptions.nonew.abilist
+++ b/libcxx/lib/abi/i686-linux-android21.libcxxabi.v1.stable.exceptions.nonew.abilist
@@ -1176,6 +1176,7 @@
 {'is_defined': True, 'name': '_ZNSt6__ndk118shared_timed_mutex8try_lockEv', 'type': 'FUNC'}
 {'is_defined': True, 'name': '_ZNSt6__ndk118shared_timed_mutexC1Ev', 'type': 'FUNC'}
 {'is_defined': True, 'name': '_ZNSt6__ndk118shared_timed_mutexC2Ev', 'type': 'FUNC'}
+{'is_defined': True, 'name': '_ZNSt6__ndk119__is_posix_terminalEP7__sFILE', 'type': 'FUNC'}
 {'is_defined': True, 'name': '_ZNSt6__ndk119__shared_mutex_base11lock_sharedEv', 'type': 'FUNC'}
 {'is_defined': True, 'name': '_ZNSt6__ndk119__shared_mutex_base13unlock_sharedEv', 'type': 'FUNC'}
 {'is_defined': True, 'name': '_ZNSt6__ndk119__shared_mutex_base15try_lock_sharedEv', 'type': 'FUNC'}
diff --git a/libcxx/lib/abi/powerpc-ibm-aix.libcxxabi.v1.stable.exceptions.nonew.abilist b/libcxx/lib/abi/powerpc-ibm-aix.libcxxabi.v1.stable.exceptions.nonew.abilist
index a159ff52218667..e52cf98dd4c4f1 100644
--- a/libcxx/lib/abi/powerpc-ibm-aix.libcxxabi.v1.stable.exceptions.nonew.abilist
+++ b/libcxx/lib/abi/powerpc-ibm-aix.libcxxabi.v1.stable.exceptions.nonew.abilist
@@ -534,6 +534,7 @@
 {'import_export': 'EXP', 'is_defined': True, 'name': '_ZNSt3__118shared_timed_mutex8try_lockEv', 'storage_mapping_class': 'DS', 'type': 'FUNC'}
 {'import_export': 'EXP', 'is_defined': True, 'name': '_ZNSt3__118shared_timed_mutexC1Ev', 'storage_mapping_class': 'DS', 'type': 'FUNC'}
 {'import_export': 'EXP', 'is_defined': True, 'name': '_ZNSt3__118shared_timed_mutexC2Ev', 'storage_mapping_class': 'DS', 'type': 'FUNC'}
+{'import_export': 'EXP', 'is_defined': True, 'name': '_ZNSt3__119__is_posix_terminalEP4FILE', 'storage_mapping_class': 'DS', 'type': 'FUNC'}
 {'import_export': 'EXP', 'is_defined': True, 'name': '_ZNSt3__119__shared_mutex_base11lock_sharedEv', 'storage_mapping_class': 'DS', 'type': 'FUNC'}
 {'import_export': 'EXP', 'is_defined': True, 'name': '_ZNSt3__119__shared_mutex_base13unlock_sharedEv', 'storage_mapping_class': 'DS', 'type': 'FUNC'}
 {'import_export': 'EXP', 'is_defined': True, 'name': '_ZNSt3__119__shared_mutex_base15try_lock_sharedEv', 'storage_mapping_class': 'DS', 'type': 'FUNC'}
diff --git a/libcxx/lib/abi/powerpc64-ibm-aix.libcxxabi.v1.stable.exceptions.nonew.abilist b/libcxx/lib/abi/powerpc64-ibm-aix.libcxxabi.v1.stable.exceptions.nonew.abilist
index 5749a7520f9bac..52a04706ddf20b 100644
--- a/libcxx/lib/abi/powerpc64-ibm-aix.libcxxabi.v1.stable.exceptions.nonew.abilist
+++ b/libcxx/lib/abi/powerpc64-ibm-aix.libcxxabi.v1.stable.exceptions.nonew.abilist
@@ -534,6 +534,7 @@
 {'import_export': 'EXP', 'is_defined': True, 'name': '_ZNSt3__118shared_timed_mutex8try_lockEv', 'storage_mapping_class': 'DS', 'type': 'FUNC'}
 {'import_export': 'EXP', 'is_defined': True, 'name': '_ZNSt3__118shared_timed_mutexC1Ev', 'storage_mapping_class': 'DS', 'type': 'FUNC'}
 {'import_export': 'EXP', 'is_defined': True, 'name': '_ZNSt3__118shared_timed_mutexC2Ev', 'storage_mapping_class': 'DS', 'type': 'FUNC'}
+{'import_export': 'EXP', 'is_defined': True, 'name': '_ZNSt3__119__is_posix_terminalEP4FILE', 'storage_mapping_class': 'DS', 'type': 'FUNC'}
 {'import_export': 'EXP', 'is_defined': True, 'name': '_ZNSt3__119__shared_mutex_base11lock_sharedEv', 'storage_mapping_class': 'DS', 'type': 'FUNC'}
 {'import_export': 'EXP', 'is_defined': True, 'name': '_ZNSt3__119__shared_mutex_base13unlock_sharedEv', 'storage_mapping_class': 'DS', 'type': 'FUNC'}
 {'import_export': 'EXP', 'is_defined': True, 'name': '_ZNSt3__119__shared_mutex_base15try_lock_sharedEv', 'storage_mapping_class': 'DS', 'type': 'FUNC'}
diff --git a/libcxx/lib/abi/x86_64-apple-darwin.libcxxabi.v1.stable.exceptions.nonew.abilist b/libcxx/lib/abi/x86_64-apple-darwin.libcxxabi.v1.stable.exceptions.nonew.abilist
index e827114f169197..bced6b2ea81ba5 100644
--- a/libcxx/lib/abi/x86_64-apple-darwin.libcxxabi.v1.stable.exceptions.nonew.abilist
+++ b/libcxx/lib/abi/x86_64-apple-darwin.libcxxabi.v1.stable.exceptions.nonew.abilist
@@ -1495,6 +1495,7 @@
 {'is_defined': True, 'name': '__ZNSt3__118shared_timed_mutex8try_lockEv', 'type': 'FUNC'}
 {'is_defined': True, 'name': '__ZNSt3__118shared_timed_mutexC1Ev', 'type': 'FUNC'}
 {'is_defined': True, 'name': '__ZNSt3__118shared_timed_mutexC2Ev', 'type': 'FUNC'}
+{'is_defined': True, 'name': '__ZNSt3__119__is_posix_terminalEP7__sFILE', 'type': 'FUNC'}
 {'is_defined': True, 'name': '__ZNSt3__119__shared_mutex_base11lock_sharedEv', 'type': 'FUNC'}
 {'is_defined': True, 'name': '__ZNSt3__119__shared_mutex_base13unlock_sharedEv', 'type': 'FUNC'}
 {'is_defined': True, 'name': '__ZNSt3__119__shared_mutex_base15try_lock_sharedEv', 'type': 'FUNC'}
diff --git a/libcxx/lib/abi/x86_64-linux-android21.libcxxabi.v1.stable.exceptions.nonew.abilist b/libcxx/lib/abi/x86_64-linux-android21.libcxxabi.v1.stable.exceptions.nonew.abilist
index f4077adc074e0a..efa2189e9c9287 100644
--- a/libcxx/lib/abi/x86_64-linux-android21.libcxxabi.v1.stable.exceptions.nonew.abilist
+++ b/libcxx/lib/abi/x86_64-linux-android21.libcxxabi.v1.stable.exceptions.nonew.abilist
@@ -1176,6 +1176,7 @@
 {'is_defined': True, 'name': '_ZNSt6__ndk118shared_timed_mutex8try_lockEv', 'type': 'FUNC'}
 {'is_defined': True, 'name': '_ZNSt6__ndk118shared_timed_mutexC1Ev', 'type': 'FUNC'}
 {'is_defined': True, 'name': '_ZNSt6__ndk118shared_timed_mutexC2Ev', 'type': 'FUNC'}
+{'is_defined': True, 'name': '_ZNSt6__ndk119__is_posix_terminalEP7__sFILE', 'type': 'FUNC'}
 {'is_defined': True, 'name': '_ZNSt6__ndk119__shared_mutex_base11lock_sharedEv', 'type': 'FUNC'}
 {'is_defined': True, 'name': '_ZNSt6__ndk119__shared_mutex_base13unlock_sharedEv', 'type': 'FUNC'}
 {'is_defined': True, 'name': '_ZNSt6__ndk119__shared_mutex_base15try_lock_sharedEv', 'type': 'FUNC'}
diff --git a/libcxx/lib/abi/x86_64-unknown-freebsd.libcxxabi.v1.stable.exceptions.nonew.abilist b/libcxx/lib/abi/x86_64-unknown-freebsd.libcxxabi.v1.stable.exceptions.nonew.abilist
index e3d3fcb35d8403..ebda5b0dfba57d 100644
--- a/libcxx/lib/abi/x86_64-unknown-freebsd.libcxxabi.v1.stable.exceptions.nonew.abilist
+++ b/libcxx/lib/abi/x86_64-unknown-freebsd.libcxxabi.v1.stable.exceptions.nonew.abilist
@@ -1190,6 +1190,7 @@
 {'is_defined': True, 'name': '_ZNSt3__118shared_timed_mutex8try_lockEv', 'type': 'FUNC'}
 {'is_defined': True, 'name': '_ZNSt3__118shared_timed_mutexC1Ev', 'type': 'FUNC'}
 {'is_defined': True, 'name': '_ZNSt3__118shared_timed_mutexC2Ev', 'type': 'FUNC'}
+{'is_defined': True, 'name': '_ZNSt3__119__is_posix_terminalEP7__sFILE', 'type': 'FUNC'}
 {'is_defined': True, 'name': '_ZNSt3__119__shared_mutex_base11lock_sharedEv', 'type': 'FUNC'}
 {'is_defined': True, 'name': '_ZNSt3__119__shared_mutex_base13unlock_sharedEv', 'type': 'FUNC'}
 {'is_defined': True, 'name': '_ZNSt3__119__shared_mutex_base15try_lock_sharedEv', 'type': 'FUNC'}
diff --git a/libcxx/lib/abi/x86_64-unknown-linux-gnu.libcxxabi.v1.stable.exceptions.nonew.abilist b/libcxx/lib/abi/x86_64-unknown-linux-gnu.libcxxabi.v1.stable.exceptions.nonew.abilist
index 16923301d2548e..6432ad3be35859 100644
--- a/libcxx/lib/abi/x86_64-unknown-linux-gnu.libcxxabi.v1.stable.exceptions.nonew.abilist
+++ b/libcxx/lib/abi/x86_64-unknown-linux-gnu.libcxxabi.v1.stable.exceptions.nonew.abilist
@@ -1188,6 +1188,7 @@
 {'is_defined': True, 'name': '_ZNSt3__118shared_timed_mutex8try_lockEv', 'type': 'FUNC'}
 {'is_defined': True, 'name': '_ZNSt3__118shared_timed_mutexC1Ev', 'type': 'FUNC'}
 {'is_defined': True, 'name': '_ZNSt3__118shared_timed_mutexC2Ev', 'type': 'FUNC'}
+{'is_defined': True, 'name': '_ZNSt3__119__is_posix_terminalEP8_IO_FILE', 'type': 'FUNC'}
 {'is_defined': True, 'name': '_ZNSt3__119__shared_mutex_base11lock_sharedEv', 'type': 'FUNC'}
 {'is_defined': True, 'name': '_ZNSt3__119__shared_mutex_base13unlock_sharedEv', 'type': 'FUNC'}
 {'is_defined': True, 'name': '_ZNSt3__119__shared_mutex_base15try_lock_sharedEv', 'type': 'FUNC'}
diff --git a/libcxx/lib/abi/x86_64-unknown-linux-gnu.libcxxabi.v1.stable.noexceptions.nonew.abilist b/libcxx/lib/abi/x86_64-unknown-linux-gnu.libcxxabi.v1.stable.noexceptions.nonew.abilist
index 2380ffb100de97..1fe84e17b3f7f0 100644
--- a/libcxx/lib/abi/x86_64-unknown-linux-gnu.libcxxabi.v1.stable.noexceptions.nonew.abilist
+++ b/libcxx/lib/abi/x86_64-unknown-linux-gnu.libcxxabi.v1.stable.noexceptions.nonew.abilist
@@ -1159,6 +1159,7 @@
 {'is_defined': True, 'name': '_ZNSt3__118shared_timed_mutex8try_lockEv', 'type': 'FUNC'}
 {'is_defined': True, 'name': '_ZNSt3__118shared_timed_mutexC1Ev', 'type': 'FUNC'}
 {'is_defined': True, 'name': '_ZNSt3__118shared_timed_mutexC2Ev', 'type': 'FUNC'}
+{'is_defined': True, 'name': '_ZNSt3__119__is_posix_terminalEP8_IO_FILE', 'type': 'FUNC'}
 {'is_defined': True, 'name': '_ZNSt3__119__shared_mutex_base11lock_sharedEv', 'type': 'FUNC'}
 {'is_defined': True, 'name': '_ZNSt3__119__shared_mutex_base13unlock_sharedEv', 'type': 'FUNC'}
 {'is_defined': True, 'name': '_ZNSt3__119__shared_mutex_base15try_lock_sharedEv', 'type': 'FUNC'}
diff --git a/libcxx/modules/CMakeLists.txt b/libcxx/modules/CMakeLists.txt
index 0388c048dacb8b..0dea8cfca94ac3 100644
--- a/libcxx/modules/CMakeLists.txt
+++ b/libcxx/modules/CMakeLists.txt
@@ -137,6 +137,25 @@ set(LIBCXX_MODULE_STD_COMPAT_SOURCES
   std.compat/cwctype.inc
 )
 
+# TODO MODULES the CMakeLists.txt in the build directory is only temporary.
+# This allows using as available in the build directory. Once build systems
+# have proper support for the installed files this will be removed.
+if ("${LIBCXX_GENERATED_INCLUDE_DIR}" STREQUAL "${LIBCXX_GENERATED_INCLUDE_TARGET_DIR}")
+  # This typically happens when the target is not installed.
+  set(LIBCXX_CONFIGURED_INCLUDE_DIRS "${LIBCXX_GENERATED_INCLUDE_DIR}")
+else()
+  # It's important that the arch directory be included first so that its header files
+  # which interpose on the default include dir be included instead of the default ones.
+  set(LIBCXX_CONFIGURED_INCLUDE_DIRS
+    "${LIBCXX_GENERATED_INCLUDE_TARGET_DIR};${LIBCXX_GENERATED_INCLUDE_DIR}"
+  )
+endif()
+configure_file(
+  "CMakeLists.txt.in"
+  "${LIBCXX_GENERATED_MODULE_DIR}/CMakeLists.txt"
+  @ONLY
+)
+
 set(LIBCXX_MODULE_STD_INCLUDE_SOURCES)
 foreach(file ${LIBCXX_MODULE_STD_SOURCES})
   set(
@@ -166,6 +185,7 @@ configure_file(
 )
 
 set(_all_modules)
+list(APPEND _all_modules "${LIBCXX_GENERATED_MODULE_DIR}/CMakeLists.txt")
 list(APPEND _all_modules "${LIBCXX_GENERATED_MODULE_DIR}/std.cppm")
 list(APPEND _all_modules "${LIBCXX_GENERATED_MODULE_DIR}/std.compat.cppm")
 foreach(file ${LIBCXX_MODULE_STD_SOURCES} ${LIBCXX_MODULE_STD_COMPAT_SOURCES})
diff --git a/libcxx/modules/CMakeLists.txt.in b/libcxx/modules/CMakeLists.txt.in
new file mode 100644
index 00000000000000..e332d70cc16333
--- /dev/null
+++ b/libcxx/modules/CMakeLists.txt.in
@@ -0,0 +1,88 @@
+cmake_minimum_required(VERSION 3.26)
+
+project(libc++-modules LANGUAGES CXX)
+
+# Enable CMake's module support
+if(CMAKE_VERSION VERSION_LESS "3.28.0")
+  if(CMAKE_VERSION VERSION_LESS "3.27.0")
+    set(CMAKE_EXPERIMENTAL_CXX_MODULE_CMAKE_API "2182bf5c-ef0d-489a-91da-49dbc3090d2a")
+  else()
+    set(CMAKE_EXPERIMENTAL_CXX_MODULE_CMAKE_API "aa1f7df0-828a-4fcd-9afc-2dc80491aca7")
+  endif()
+  set(CMAKE_EXPERIMENTAL_CXX_MODULE_DYNDEP 1)
+else()
+  cmake_policy(VERSION 3.28)
+endif()
+
+# Default to C++ extensions being off. Libc++'s modules support have trouble
+# with extensions right now.
+set(CMAKE_CXX_EXTENSIONS OFF)
+
+# Propagates the CMake options to the modules.
+#
+# This uses the std module hard-coded since the std.compat module does not
+# depend on these flags.
+macro(compile_define_if_not condition def)
+  if (NOT ${condition})
+    target_compile_definitions(std PRIVATE ${def})
+  endif()
+endmacro()
+macro(compile_define_if condition def)
+  if (${condition})
+    target_compile_definitions(std PRIVATE ${def})
+  endif()
+endmacro()
+
+### STD
+
+add_library(std)
+target_sources(std
+  PUBLIC FILE_SET cxx_modules TYPE CXX_MODULES FILES
+    std.cppm
+)
+
+target_include_directories(std SYSTEM PRIVATE @LIBCXX_CONFIGURED_INCLUDE_DIRS@)
+
+if (NOT @LIBCXX_ENABLE_EXCEPTIONS@)
+  target_compile_options(std PUBLIC -fno-exceptions)
+endif()
+
+target_compile_options(std
+  PUBLIC
+    -nostdinc++
+    -Wno-reserved-module-identifier
+    -Wno-reserved-user-defined-literal
+    @LIBCXX_COMPILE_FLAGS@
+)
+set_target_properties(std
+  PROPERTIES
+    OUTPUT_NAME   "c++std"
+)
+
+### STD.COMPAT
+
+add_library(std.compat)
+target_sources(std.compat
+  PUBLIC FILE_SET cxx_modules TYPE CXX_MODULES FILES
+    std.compat.cppm
+)
+
+target_include_directories(std.compat SYSTEM PRIVATE @LIBCXX_CONFIGURED_INCLUDE_DIRS@)
+
+if (NOT @LIBCXX_ENABLE_EXCEPTIONS@)
+  target_compile_options(std.compat PUBLIC -fno-exceptions)
+endif()
+
+target_compile_options(std.compat
+  PUBLIC
+    -nostdinc++
+    -Wno-reserved-module-identifier
+    -Wno-reserved-user-defined-literal
+	-fmodule-file=std=${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/std.dir/std.pcm
+    @LIBCXX_COMPILE_FLAGS@
+)
+set_target_properties(std.compat
+  PROPERTIES
+    OUTPUT_NAME   "c++std.compat"
+)
+add_dependencies(std.compat std)
diff --git a/libcxx/src/print.cpp b/libcxx/src/print.cpp
index 3692187a5954a3..8fa59fdd097bcd 100644
--- a/libcxx/src/print.cpp
+++ b/libcxx/src/print.cpp
@@ -8,22 +8,26 @@
 
 #include <__config>
 
-#if defined(_LIBCPP_WIN32API)
+#include <cstdlib>
+#include <print>
+
+#include <__system_error/system_error.h>
 
-#  include <cstdlib>
-#  include <print>
+#include "filesystem/error.h"
 
+#if defined(_LIBCPP_WIN32API)
 #  define WIN32_LEAN_AND_MEAN
 #  define NOMINMAX
 #  include <io.h>
 #  include <windows.h>
-
-#  include <__system_error/system_error.h>
-
-#  include "filesystem/error.h"
+#elif __has_include(<unistd.h>)
+#  include <unistd.h>
+#endif
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
+#if defined(_LIBCPP_WIN32API)
+
 _LIBCPP_EXPORTED_FROM_ABI bool __is_windows_terminal(FILE* __stream) {
   // Note the Standard does this in one call, but it's unclear whether
   // an invalid handle is allowed when calling GetConsoleMode.
@@ -52,6 +56,9 @@ __write_to_windows_console([[maybe_unused]] FILE* __stream, [[maybe_unused]] wst
 }
 #  endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS
 
-_LIBCPP_END_NAMESPACE_STD
+#elif __has_include(<unistd.h>) // !_LIBCPP_WIN32API
 
-#endif // !_LIBCPP_WIN32API
+_LIBCPP_EXPORTED_FROM_ABI bool __is_posix_terminal(FILE* __stream) { return isatty(fileno(__stream)); }
+#endif
+
+_LIBCPP_END_NAMESPACE_STD
diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp
index ab2ec5b447d000..05fd38fb753fda 100644
--- a/lld/ELF/Arch/LoongArch.cpp
+++ b/lld/ELF/Arch/LoongArch.cpp
@@ -36,6 +36,8 @@ class LoongArch final : public TargetInfo {
   bool usesOnlyLowPageBits(RelType type) const override;
   void relocate(uint8_t *loc, const Relocation &rel,
                 uint64_t val) const override;
+  bool relaxOnce(int pass) const override;
+  void finalizeRelax(int passes) const override;
 };
 } // end anonymous namespace
 
@@ -465,8 +467,9 @@ RelExpr LoongArch::getRelExpr(const RelType type, const Symbol &s,
   case R_LARCH_TLS_GD_HI20:
     return R_TLSGD_GOT;
   case R_LARCH_RELAX:
-    // LoongArch linker relaxation is not implemented yet.
-    return R_NONE;
+    return config->relax ? R_RELAX_HINT : R_NONE;
+  case R_LARCH_ALIGN:
+    return R_RELAX_HINT;
 
   // Other known relocs that are explicitly unimplemented:
   //
@@ -659,6 +662,155 @@ void LoongArch::relocate(uint8_t *loc, const Relocation &rel,
   }
 }
 
+static bool relax(InputSection &sec) {
+  const uint64_t secAddr = sec.getVA();
+  const MutableArrayRef<Relocation> relocs = sec.relocs();
+  auto &aux = *sec.relaxAux;
+  bool changed = false;
+  ArrayRef<SymbolAnchor> sa = ArrayRef(aux.anchors);
+  uint64_t delta = 0;
+
+  std::fill_n(aux.relocTypes.get(), relocs.size(), R_LARCH_NONE);
+  aux.writes.clear();
+  for (auto [i, r] : llvm::enumerate(relocs)) {
+    const uint64_t loc = secAddr + r.offset - delta;
+    uint32_t &cur = aux.relocDeltas[i], remove = 0;
+    switch (r.type) {
+    case R_LARCH_ALIGN: {
+      const uint64_t addend =
+          r.sym->isUndefined() ? Log2_64(r.addend) + 1 : r.addend;
+      const uint64_t allBytes = (1 << (addend & 0xff)) - 4;
+      const uint64_t align = 1 << (addend & 0xff);
+      const uint64_t maxBytes = addend >> 8;
+      const uint64_t off = loc & (align - 1);
+      const uint64_t curBytes = off == 0 ? 0 : align - off;
+      // All bytes beyond the alignment boundary should be removed.
+      // If emit bytes more than max bytes to emit, remove all.
+      if (maxBytes != 0 && curBytes > maxBytes)
+        remove = allBytes;
+      else
+        remove = allBytes - curBytes;
+      // If we can't satisfy this alignment, we've found a bad input.
+      if (LLVM_UNLIKELY(static_cast<int32_t>(remove) < 0)) {
+        errorOrWarn(getErrorLocation((const uint8_t *)loc) +
+                    "insufficient padding bytes for " + lld::toString(r.type) +
+                    ": " + Twine(allBytes) + " bytes available for " +
+                    "requested alignment of " + Twine(align) + " bytes");
+        remove = 0;
+      }
+      break;
+    }
+    }
+
+    // For all anchors whose offsets are <= r.offset, they are preceded by
+    // the previous relocation whose `relocDeltas` value equals `delta`.
+    // Decrease their st_value and update their st_size.
+    for (; sa.size() && sa[0].offset <= r.offset; sa = sa.slice(1)) {
+      if (sa[0].end)
+        sa[0].d->size = sa[0].offset - delta - sa[0].d->value;
+      else
+        sa[0].d->value = sa[0].offset - delta;
+    }
+    delta += remove;
+    if (delta != cur) {
+      cur = delta;
+      changed = true;
+    }
+  }
+
+  for (const SymbolAnchor &a : sa) {
+    if (a.end)
+      a.d->size = a.offset - delta - a.d->value;
+    else
+      a.d->value = a.offset - delta;
+  }
+  // Inform assignAddresses that the size has changed.
+  if (!isUInt<32>(delta))
+    fatal("section size decrease is too large: " + Twine(delta));
+  sec.bytesDropped = delta;
+  return changed;
+}
+
+// When relaxing just R_LARCH_ALIGN, relocDeltas is usually changed only once in
+// the absence of a linker script. For call and load/store R_LARCH_RELAX, code
+// shrinkage may reduce displacement and make more relocations eligible for
+// relaxation. Code shrinkage may increase displacement to a call/load/store
+// target at a higher fixed address, invalidating an earlier relaxation. Any
+// change in section sizes can have cascading effect and require another
+// relaxation pass.
+bool LoongArch::relaxOnce(int pass) const {
+  if (config->relocatable)
+    return false;
+
+  if (pass == 0)
+    initSymbolAnchors();
+
+  SmallVector<InputSection *, 0> storage;
+  bool changed = false;
+  for (OutputSection *osec : outputSections) {
+    if (!(osec->flags & SHF_EXECINSTR))
+      continue;
+    for (InputSection *sec : getInputSections(*osec, storage))
+      changed |= relax(*sec);
+  }
+  return changed;
+}
+
+void LoongArch::finalizeRelax(int passes) const {
+  log("relaxation passes: " + Twine(passes));
+  SmallVector<InputSection *, 0> storage;
+  for (OutputSection *osec : outputSections) {
+    if (!(osec->flags & SHF_EXECINSTR))
+      continue;
+    for (InputSection *sec : getInputSections(*osec, storage)) {
+      RelaxAux &aux = *sec->relaxAux;
+      if (!aux.relocDeltas)
+        continue;
+
+      MutableArrayRef<Relocation> rels = sec->relocs();
+      ArrayRef<uint8_t> old = sec->content();
+      size_t newSize = old.size() - aux.relocDeltas[rels.size() - 1];
+      uint8_t *p = context().bAlloc.Allocate<uint8_t>(newSize);
+      uint64_t offset = 0;
+      int64_t delta = 0;
+      sec->content_ = p;
+      sec->size = newSize;
+      sec->bytesDropped = 0;
+
+      // Update section content: remove NOPs for R_LARCH_ALIGN and rewrite
+      // instructions for relaxed relocations.
+      for (size_t i = 0, e = rels.size(); i != e; ++i) {
+        uint32_t remove = aux.relocDeltas[i] - delta;
+        delta = aux.relocDeltas[i];
+        if (remove == 0 && aux.relocTypes[i] == R_LARCH_NONE)
+          continue;
+
+        // Copy from last location to the current relocated location.
+        const Relocation &r = rels[i];
+        uint64_t size = r.offset - offset;
+        memcpy(p, old.data() + offset, size);
+        p += size;
+        offset = r.offset + remove;
+      }
+      memcpy(p, old.data() + offset, old.size() - offset);
+
+      // Subtract the previous relocDeltas value from the relocation offset.
+      // For a pair of R_LARCH_XXX/R_LARCH_RELAX with the same offset, decrease
+      // their r_offset by the same delta.
+      delta = 0;
+      for (size_t i = 0, e = rels.size(); i != e;) {
+        uint64_t cur = rels[i].offset;
+        do {
+          rels[i].offset -= delta;
+          if (aux.relocTypes[i] != R_LARCH_NONE)
+            rels[i].type = aux.relocTypes[i];
+        } while (++i != e && rels[i].offset == cur);
+        delta = aux.relocDeltas[i - 1];
+      }
+    }
+  }
+}
+
 TargetInfo *elf::getLoongArchTargetInfo() {
   static LoongArch target;
   return &target;
diff --git a/lld/ELF/Arch/RISCV.cpp b/lld/ELF/Arch/RISCV.cpp
index 8ce92b4badfbd7..5fcab4d39d43a8 100644
--- a/lld/ELF/Arch/RISCV.cpp
+++ b/lld/ELF/Arch/RISCV.cpp
@@ -45,6 +45,7 @@ class RISCV final : public TargetInfo {
                 uint64_t val) const override;
   void relocateAlloc(InputSectionBase &sec, uint8_t *buf) const override;
   bool relaxOnce(int pass) const override;
+  void finalizeRelax(int passes) const override;
 };
 
 } // end anonymous namespace
@@ -104,26 +105,6 @@ static uint32_t setLO12_S(uint32_t insn, uint32_t imm) {
          (extractBits(imm, 4, 0) << 7);
 }
 
-namespace {
-struct SymbolAnchor {
-  uint64_t offset;
-  Defined *d;
-  bool end; // true for the anchor of st_value+st_size
-};
-} // namespace
-
-struct elf::RISCVRelaxAux {
-  // This records symbol start and end offsets which will be adjusted according
-  // to the nearest relocDeltas element.
-  SmallVector<SymbolAnchor, 0> anchors;
-  // For relocations[i], the actual offset is
-  //   r_offset - (i ? relocDeltas[i-1] : 0).
-  std::unique_ptr<uint32_t[]> relocDeltas;
-  // For relocations[i], the actual type is relocTypes[i].
-  std::unique_ptr<RelType[]> relocTypes;
-  SmallVector<uint32_t, 0> writes;
-};
-
 RISCV::RISCV() {
   copyRel = R_RISCV_COPY;
   pltRel = R_RISCV_JUMP_SLOT;
@@ -695,13 +676,13 @@ void RISCV::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const {
   }
 }
 
-static void initSymbolAnchors() {
+void elf::initSymbolAnchors() {
   SmallVector<InputSection *, 0> storage;
   for (OutputSection *osec : outputSections) {
     if (!(osec->flags & SHF_EXECINSTR))
       continue;
     for (InputSection *sec : getInputSections(*osec, storage)) {
-      sec->relaxAux = make<RISCVRelaxAux>();
+      sec->relaxAux = make<RelaxAux>();
       if (sec->relocs().size()) {
         sec->relaxAux->relocDeltas =
             std::make_unique<uint32_t[]>(sec->relocs().size());
@@ -948,7 +929,7 @@ bool RISCV::relaxOnce(int pass) const {
   return changed;
 }
 
-void elf::riscvFinalizeRelax(int passes) {
+void RISCV::finalizeRelax(int passes) const {
   llvm::TimeTraceScope timeScope("Finalize RISC-V relaxation");
   log("relaxation passes: " + Twine(passes));
   SmallVector<InputSection *, 0> storage;
@@ -956,7 +937,7 @@ void elf::riscvFinalizeRelax(int passes) {
     if (!(osec->flags & SHF_EXECINSTR))
       continue;
     for (InputSection *sec : getInputSections(*osec, storage)) {
-      RISCVRelaxAux &aux = *sec->relaxAux;
+      RelaxAux &aux = *sec->relaxAux;
       if (!aux.relocDeltas)
         continue;
 
diff --git a/lld/ELF/Arch/SystemZ.cpp b/lld/ELF/Arch/SystemZ.cpp
new file mode 100644
index 00000000000000..d37db6877559dc
--- /dev/null
+++ b/lld/ELF/Arch/SystemZ.cpp
@@ -0,0 +1,607 @@
+//===- SystemZ.cpp --------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "OutputSections.h"
+#include "Symbols.h"
+#include "SyntheticSections.h"
+#include "Target.h"
+#include "lld/Common/ErrorHandler.h"
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/Support/Endian.h"
+
+using namespace llvm;
+using namespace llvm::support::endian;
+using namespace llvm::ELF;
+using namespace lld;
+using namespace lld::elf;
+
+namespace {
+class SystemZ : public TargetInfo {
+public:
+  SystemZ();
+  int getTlsGdRelaxSkip(RelType type) const override;
+  RelExpr getRelExpr(RelType type, const Symbol &s,
+                     const uint8_t *loc) const override;
+  RelType getDynRel(RelType type) const override;
+  void writeGotHeader(uint8_t *buf) const override;
+  void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
+  void writeIgotPlt(uint8_t *buf, const Symbol &s) const override;
+  void writePltHeader(uint8_t *buf) const override;
+  void addPltHeaderSymbols(InputSection &isd) const override;
+  void writePlt(uint8_t *buf, const Symbol &sym,
+                uint64_t pltEntryAddr) const override;
+  RelExpr adjustTlsExpr(RelType type, RelExpr expr) const override;
+  RelExpr adjustGotPcExpr(RelType type, int64_t addend,
+                          const uint8_t *loc) const override;
+  bool relaxOnce(int pass) const override;
+  void relocate(uint8_t *loc, const Relocation &rel,
+                uint64_t val) const override;
+  int64_t getImplicitAddend(const uint8_t *buf, RelType type) const override;
+
+private:
+  void relaxGot(uint8_t *loc, const Relocation &rel, uint64_t val) const;
+  void relaxTlsGdToIe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
+  void relaxTlsGdToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
+  void relaxTlsLdToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
+};
+} // namespace
+
+SystemZ::SystemZ() {
+  copyRel = R_390_COPY;
+  gotRel = R_390_GLOB_DAT;
+  pltRel = R_390_JMP_SLOT;
+  relativeRel = R_390_RELATIVE;
+  iRelativeRel = R_390_IRELATIVE;
+  symbolicRel = R_390_64;
+  tlsGotRel = R_390_TLS_TPOFF;
+  tlsModuleIndexRel = R_390_TLS_DTPMOD;
+  tlsOffsetRel = R_390_TLS_DTPOFF;
+  gotHeaderEntriesNum = 3;
+  gotPltHeaderEntriesNum = 0;
+  gotEntrySize = 8;
+  pltHeaderSize = 32;
+  pltEntrySize = 32;
+  ipltEntrySize = 32;
+
+  // This "trap instruction" is used to fill gaps between sections.
+  // On SystemZ, the behavior of the GNU ld is to fill those gaps
+  // with nop instructions instead - and unfortunately the default
+  // glibc crt object files (used to) rely on that behavior since
+  // they use an alignment on the .init section fragments that causes
+  // gaps which must be filled with nops as they are being executed.
+  // Therefore, we provide a nop instruction as "trapInstr" here.
+  trapInstr = {0x07, 0x07, 0x07, 0x07};
+
+  defaultImageBase = 0x1000000;
+}
+
+RelExpr SystemZ::getRelExpr(RelType type, const Symbol &s,
+                            const uint8_t *loc) const {
+  switch (type) {
+  case R_390_NONE:
+    return R_NONE;
+  // Relocations targeting the symbol value.
+  case R_390_8:
+  case R_390_12:
+  case R_390_16:
+  case R_390_20:
+  case R_390_32:
+  case R_390_64:
+    return R_ABS;
+  case R_390_PC16:
+  case R_390_PC32:
+  case R_390_PC64:
+  case R_390_PC12DBL:
+  case R_390_PC16DBL:
+  case R_390_PC24DBL:
+  case R_390_PC32DBL:
+    return R_PC;
+  case R_390_GOTOFF16:
+  case R_390_GOTOFF: // a.k.a. R_390_GOTOFF32
+  case R_390_GOTOFF64:
+    return R_GOTREL;
+  // Relocations targeting the PLT associated with the symbol.
+  case R_390_PLT32:
+  case R_390_PLT64:
+  case R_390_PLT12DBL:
+  case R_390_PLT16DBL:
+  case R_390_PLT24DBL:
+  case R_390_PLT32DBL:
+    return R_PLT_PC;
+  case R_390_PLTOFF16:
+  case R_390_PLTOFF32:
+  case R_390_PLTOFF64:
+    return R_PLT_GOTREL;
+  // Relocations targeting the GOT entry associated with the symbol.
+  case R_390_GOTENT:
+    return R_GOT_PC;
+  case R_390_GOT12:
+  case R_390_GOT16:
+  case R_390_GOT20:
+  case R_390_GOT32:
+  case R_390_GOT64:
+    return R_GOT_OFF;
+  // Relocations targeting the GOTPLT entry associated with the symbol.
+  case R_390_GOTPLTENT:
+    return R_GOTPLT_PC;
+  case R_390_GOTPLT12:
+  case R_390_GOTPLT16:
+  case R_390_GOTPLT20:
+  case R_390_GOTPLT32:
+  case R_390_GOTPLT64:
+    return R_GOTPLT_GOTREL;
+  // Relocations targeting _GLOBAL_OFFSET_TABLE_.
+  case R_390_GOTPC:
+  case R_390_GOTPCDBL:
+    return R_GOTONLY_PC;
+  // TLS-related relocations.
+  case R_390_TLS_LOAD:
+    return R_NONE;
+  case R_390_TLS_GDCALL:
+    return R_TLSGD_PC;
+  case R_390_TLS_LDCALL:
+    return R_TLSLD_PC;
+  case R_390_TLS_GD32:
+  case R_390_TLS_GD64:
+    return R_TLSGD_GOT;
+  case R_390_TLS_LDM32:
+  case R_390_TLS_LDM64:
+    return R_TLSLD_GOT;
+  case R_390_TLS_LDO32:
+  case R_390_TLS_LDO64:
+    return R_DTPREL;
+  case R_390_TLS_LE32:
+  case R_390_TLS_LE64:
+    return R_TPREL;
+  case R_390_TLS_IE32:
+  case R_390_TLS_IE64:
+    return R_GOT;
+  case R_390_TLS_GOTIE12:
+  case R_390_TLS_GOTIE20:
+  case R_390_TLS_GOTIE32:
+  case R_390_TLS_GOTIE64:
+    return R_GOT_OFF;
+  case R_390_TLS_IEENT:
+    return R_GOT_PC;
+
+  default:
+    error(getErrorLocation(loc) + "unknown relocation (" + Twine(type) +
+          ") against symbol " + toString(s));
+    return R_NONE;
+  }
+}
+
+void SystemZ::writeGotHeader(uint8_t *buf) const {
+  // _GLOBAL_OFFSET_TABLE_[0] holds the value of _DYNAMIC.
+  // _GLOBAL_OFFSET_TABLE_[1] and [2] are reserved.
+  write64be(buf, mainPart->dynamic->getVA());
+}
+
+void SystemZ::writeGotPlt(uint8_t *buf, const Symbol &s) const {
+  write64be(buf, s.getPltVA() + 14);
+}
+
+void SystemZ::writeIgotPlt(uint8_t *buf, const Symbol &s) const {
+  if (config->writeAddends)
+    write64be(buf, s.getVA());
+}
+
+void SystemZ::writePltHeader(uint8_t *buf) const {
+  const uint8_t pltData[] = {
+      0xe3, 0x10, 0xf0, 0x38, 0x00, 0x24, // stg     %r1,56(%r15)
+      0xc0, 0x10, 0x00, 0x00, 0x00, 0x00, // larl    %r1,_GLOBAL_OFFSET_TABLE_
+      0xd2, 0x07, 0xf0, 0x30, 0x10, 0x08, // mvc     48(8,%r15),8(%r1)
+      0xe3, 0x10, 0x10, 0x10, 0x00, 0x04, // lg      %r1,16(%r1)
+      0x07, 0xf1,                         // br      %r1
+      0x07, 0x00,                         // nopr
+      0x07, 0x00,                         // nopr
+      0x07, 0x00,                         // nopr
+  };
+  memcpy(buf, pltData, sizeof(pltData));
+  uint64_t got = in.got->getVA();
+  uint64_t plt = in.plt->getVA();
+  write32be(buf + 8, (got - plt - 6) >> 1);
+}
+
+void SystemZ::addPltHeaderSymbols(InputSection &isec) const {
+  // The PLT header needs a reference to _GLOBAL_OFFSET_TABLE_, so we
+  // must ensure the .got section is created even if otherwise unused.
+  in.got->hasGotOffRel.store(true, std::memory_order_relaxed);
+}
+
+void SystemZ::writePlt(uint8_t *buf, const Symbol &sym,
+                       uint64_t pltEntryAddr) const {
+  const uint8_t inst[] = {
+      0xc0, 0x10, 0x00, 0x00, 0x00, 0x00, // larl    %r1,<.got.plt slot>
+      0xe3, 0x10, 0x10, 0x00, 0x00, 0x04, // lg      %r1,0(%r1)
+      0x07, 0xf1,                         // br      %r1
+      0x0d, 0x10,                         // basr    %r1,%r0
+      0xe3, 0x10, 0x10, 0x0c, 0x00, 0x14, // lgf     %r1,12(%r1)
+      0xc0, 0xf4, 0x00, 0x00, 0x00, 0x00, // jg      <plt header>
+      0x00, 0x00, 0x00, 0x00,             // <relocation offset>
+  };
+  memcpy(buf, inst, sizeof(inst));
+
+  write32be(buf + 2, (sym.getGotPltVA() - pltEntryAddr) >> 1);
+  write32be(buf + 24, (in.plt->getVA() - pltEntryAddr - 22) >> 1);
+  write32be(buf + 28, in.relaPlt->entsize * sym.getPltIdx());
+}
+
+int64_t SystemZ::getImplicitAddend(const uint8_t *buf, RelType type) const {
+  switch (type) {
+  case R_390_8:
+    return SignExtend64<8>(*buf);
+  case R_390_16:
+  case R_390_PC16:
+    return SignExtend64<16>(read16be(buf));
+  case R_390_PC16DBL:
+    return SignExtend64<16>(read16be(buf)) << 1;
+  case R_390_32:
+  case R_390_PC32:
+    return SignExtend64<32>(read32be(buf));
+  case R_390_PC32DBL:
+    return SignExtend64<32>(read32be(buf)) << 1;
+  case R_390_64:
+  case R_390_PC64:
+  case R_390_TLS_DTPMOD:
+  case R_390_TLS_DTPOFF:
+  case R_390_TLS_TPOFF:
+  case R_390_GLOB_DAT:
+  case R_390_RELATIVE:
+  case R_390_IRELATIVE:
+    return read64be(buf);
+  case R_390_COPY:
+  case R_390_JMP_SLOT:
+  case R_390_NONE:
+    // These relocations are defined as not having an implicit addend.
+    return 0;
+  default:
+    internalLinkerError(getErrorLocation(buf),
+                        "cannot read addend for relocation " + toString(type));
+    return 0;
+  }
+}
+
+RelType SystemZ::getDynRel(RelType type) const {
+  if (type == R_390_64 || type == R_390_PC64)
+    return type;
+  return R_390_NONE;
+}
+
+RelExpr SystemZ::adjustTlsExpr(RelType type, RelExpr expr) const {
+  if (expr == R_RELAX_TLS_GD_TO_IE)
+    return R_RELAX_TLS_GD_TO_IE_GOT_OFF;
+  return expr;
+}
+
+int SystemZ::getTlsGdRelaxSkip(RelType type) const {
+  // A __tls_get_offset call instruction is marked with 2 relocations:
+  //
+  //   R_390_TLS_GDCALL / R_390_TLS_LDCALL: marker relocation
+  //   R_390_PLT32DBL: __tls_get_offset
+  //
+  // After the relaxation we no longer call __tls_get_offset and should skip
+  // both relocations to not create a false dependence on __tls_get_offset
+  // being defined.
+  //
+  // Note that this mechanism only works correctly if the R_390_TLS_[GL]DCALL
+  // is seen immediately *before* the R_390_PLT32DBL.  Unfortunately, current
+  // compilers on the platform will typically generate the inverse sequence.
+  // To fix this, we sort relocations by offset in RelocationScanner::scan;
+  // this ensures the correct sequence as the R_390_TLS_[GL]DCALL applies to
+  // the first byte of the brasl instruction, while the R_390_PLT32DBL applies
+  // to its third byte (the relative displacement).
+
+  if (type == R_390_TLS_GDCALL || type == R_390_TLS_LDCALL)
+    return 2;
+  return 1;
+}
+
+void SystemZ::relaxTlsGdToIe(uint8_t *loc, const Relocation &rel,
+                             uint64_t val) const {
+  // The general-dynamic code sequence for a global `x`:
+  //
+  // Instruction                      Relocation       Symbol
+  // ear %rX,%a0
+  // sllg %rX,%rX,32
+  // ear %rX,%a1
+  // larl %r12,_GLOBAL_OFFSET_TABLE_  R_390_GOTPCDBL   _GLOBAL_OFFSET_TABLE_
+  // lgrl %r2,.LC0                    R_390_PC32DBL    .LC0
+  // brasl %r14,__tls_get_offset@plt  R_390_TLS_GDCALL x
+  //            :tls_gdcall:x         R_390_PLT32DBL   __tls_get_offset
+  // la %r2,0(%r2,%rX)
+  //
+  // .LC0:
+  // .quad   x@TLSGD                  R_390_TLS_GD64   x
+  //
+  // Relaxing to initial-exec entails:
+  // 1) Replacing the call by a load from the GOT.
+  // 2) Replacing the relocation on the constant LC0 by R_390_TLS_GOTIE64.
+
+  switch (rel.type) {
+  case R_390_TLS_GDCALL:
+    // brasl %r14,__tls_get_offset@plt -> lg %r2,0(%r2,%r12)
+    write16be(loc, 0xe322);
+    write32be(loc + 2, 0xc0000004);
+    break;
+  case R_390_TLS_GD64:
+    relocateNoSym(loc, R_390_TLS_GOTIE64, val);
+    break;
+  default:
+    llvm_unreachable("unsupported relocation for TLS GD to IE relaxation");
+  }
+}
+
+void SystemZ::relaxTlsGdToLe(uint8_t *loc, const Relocation &rel,
+                             uint64_t val) const {
+  // The general-dynamic code sequence for a global `x`:
+  //
+  // Instruction                      Relocation       Symbol
+  // ear %rX,%a0
+  // sllg %rX,%rX,32
+  // ear %rX,%a1
+  // larl %r12,_GLOBAL_OFFSET_TABLE_  R_390_GOTPCDBL   _GLOBAL_OFFSET_TABLE_
+  // lgrl %r2,.LC0                    R_390_PC32DBL    .LC0
+  // brasl %r14,__tls_get_offset@plt  R_390_TLS_GDCALL x
+  //            :tls_gdcall:x         R_390_PLT32DBL   __tls_get_offset
+  // la %r2,0(%r2,%rX)
+  //
+  // .LC0:
+  // .quad   x@tlsgd                  R_390_TLS_GD64   x
+  //
+  // Relaxing to local-exec entails:
+  // 1) Replacing the call by a nop.
+  // 2) Replacing the relocation on the constant LC0 by R_390_TLS_LE64.
+
+  switch (rel.type) {
+  case R_390_TLS_GDCALL:
+    // brasl %r14,__tls_get_offset@plt -> brcl 0,.
+    write16be(loc, 0xc004);
+    write32be(loc + 2, 0x00000000);
+    break;
+  case R_390_TLS_GD64:
+    relocateNoSym(loc, R_390_TLS_LE64, val);
+    break;
+  default:
+    llvm_unreachable("unsupported relocation for TLS GD to LE relaxation");
+  }
+}
+
+void SystemZ::relaxTlsLdToLe(uint8_t *loc, const Relocation &rel,
+                             uint64_t val) const {
+  // The local-dynamic code sequence for a global `x`:
+  //
+  // Instruction                      Relocation       Symbol
+  // ear %rX,%a0
+  // sllg %rX,%rX,32
+  // ear %rX,%a1
+  // larl %r12,_GLOBAL_OFFSET_TABLE_  R_390_GOTPCDBL   _GLOBAL_OFFSET_TABLE_
+  // lgrl %r2,.LC0                    R_390_PC32DBL    .LC0
+  // brasl %r14,__tls_get_offset@plt  R_390_TLS_LDCALL <sym>
+  //            :tls_ldcall:<sym>     R_390_PLT32DBL   __tls_get_offset
+  // la %r2,0(%r2,%rX)
+  // lgrl %rY,.LC1                    R_390_PC32DBL    .LC1
+  // la %r2,0(%r2,%rY)
+  //
+  // .LC0:
+  // .quad   <sym>@tlsldm             R_390_TLS_LDM64  <sym>
+  // .LC1:
+  // .quad   x@dtpoff                 R_390_TLS_LDO64  x
+  //
+  // Relaxing to local-exec entails:
+  // 1) Replacing the call by a nop.
+  // 2) Replacing the constant LC0 by 0 (i.e. ignoring the relocation).
+  // 3) Replacing the relocation on the constant LC1 by R_390_TLS_LE64.
+
+  switch (rel.type) {
+  case R_390_TLS_LDCALL:
+    // brasl %r14,__tls_get_offset@plt -> brcl 0,.
+    write16be(loc, 0xc004);
+    write32be(loc + 2, 0x00000000);
+    break;
+  case R_390_TLS_LDM64:
+    break;
+  case R_390_TLS_LDO64:
+    relocateNoSym(loc, R_390_TLS_LE64, val);
+    break;
+  default:
+    llvm_unreachable("unsupported relocation for TLS LD to LE relaxation");
+  }
+}
+
+RelExpr SystemZ::adjustGotPcExpr(RelType type, int64_t addend,
+                                 const uint8_t *loc) const {
+  // Only R_390_GOTENT with addend 2 can be relaxed.
+  if (!config->relax || addend != 2 || type != R_390_GOTENT)
+    return R_GOT_PC;
+  const uint16_t op = read16be(loc - 2);
+
+  // lgrl rx,sym@GOTENT -> larl rx, sym
+  // This relaxation is legal if "sym" binds locally (which was already
+  // verified by our caller) and is in-range and properly aligned for a
+  // LARL instruction.  We cannot verify the latter constraint here, so
+  // we assume it is true and revert the decision later on in relaxOnce
+  // if necessary.
+  if ((op & 0xff0f) == 0xc408)
+    return R_RELAX_GOT_PC;
+
+  return R_GOT_PC;
+}
+
+bool SystemZ::relaxOnce(int pass) const {
+  // If we decided in adjustGotPcExpr to relax a R_390_GOTENT,
+  // we need to validate the target symbol is in-range and aligned.
+  SmallVector<InputSection *, 0> storage;
+  bool changed = false;
+  for (OutputSection *osec : outputSections) {
+    if (!(osec->flags & SHF_EXECINSTR))
+      continue;
+    for (InputSection *sec : getInputSections(*osec, storage)) {
+      for (Relocation &rel : sec->relocs()) {
+        if (rel.expr != R_RELAX_GOT_PC)
+          continue;
+
+        uint64_t v = sec->getRelocTargetVA(
+            sec->file, rel.type, rel.addend,
+            sec->getOutputSection()->addr + rel.offset, *rel.sym, rel.expr);
+        if (isInt<33>(v) && !(v & 1))
+          continue;
+        if (rel.sym->auxIdx == 0) {
+          rel.sym->allocateAux();
+          addGotEntry(*rel.sym);
+          changed = true;
+        }
+        rel.expr = R_GOT_PC;
+      }
+    }
+  }
+  return changed;
+}
+
+void SystemZ::relaxGot(uint8_t *loc, const Relocation &rel,
+                       uint64_t val) const {
+  assert(isInt<33>(val) &&
+         "R_390_GOTENT should not have been relaxed if it overflows");
+  assert(!(val & 1) &&
+         "R_390_GOTENT should not have been relaxed if it is misaligned");
+  const uint16_t op = read16be(loc - 2);
+
+  // lgrl rx,sym@GOTENT -> larl rx, sym
+  if ((op & 0xff0f) == 0xc408) {
+    write16be(loc - 2, 0xc000 | (op & 0x00f0));
+    write32be(loc, val >> 1);
+  }
+}
+
+void SystemZ::relocate(uint8_t *loc, const Relocation &rel,
+                       uint64_t val) const {
+  switch (rel.expr) {
+  case R_RELAX_GOT_PC:
+    return relaxGot(loc, rel, val);
+  case R_RELAX_TLS_GD_TO_IE_GOT_OFF:
+    return relaxTlsGdToIe(loc, rel, val);
+  case R_RELAX_TLS_GD_TO_LE:
+    return relaxTlsGdToLe(loc, rel, val);
+  case R_RELAX_TLS_LD_TO_LE:
+    return relaxTlsLdToLe(loc, rel, val);
+  default:
+    break;
+  }
+  switch (rel.type) {
+  case R_390_8:
+    checkIntUInt(loc, val, 8, rel);
+    *loc = val;
+    break;
+  case R_390_12:
+  case R_390_GOT12:
+  case R_390_GOTPLT12:
+  case R_390_TLS_GOTIE12:
+    checkUInt(loc, val, 12, rel);
+    write16be(loc, (read16be(loc) & 0xF000) | val);
+    break;
+  case R_390_PC12DBL:
+  case R_390_PLT12DBL:
+    checkInt(loc, val, 13, rel);
+    checkAlignment(loc, val, 2, rel);
+    write16be(loc, (read16be(loc) & 0xF000) | ((val >> 1) & 0x0FFF));
+    break;
+  case R_390_16:
+  case R_390_GOT16:
+  case R_390_GOTPLT16:
+  case R_390_GOTOFF16:
+  case R_390_PLTOFF16:
+    checkIntUInt(loc, val, 16, rel);
+    write16be(loc, val);
+    break;
+  case R_390_PC16:
+    checkInt(loc, val, 16, rel);
+    write16be(loc, val);
+    break;
+  case R_390_PC16DBL:
+  case R_390_PLT16DBL:
+    checkInt(loc, val, 17, rel);
+    checkAlignment(loc, val, 2, rel);
+    write16be(loc, val >> 1);
+    break;
+  case R_390_20:
+  case R_390_GOT20:
+  case R_390_GOTPLT20:
+  case R_390_TLS_GOTIE20:
+    checkInt(loc, val, 20, rel);
+    write32be(loc, (read32be(loc) & 0xF00000FF) | ((val & 0xFFF) << 16) |
+                       ((val & 0xFF000) >> 4));
+    break;
+  case R_390_PC24DBL:
+  case R_390_PLT24DBL:
+    checkInt(loc, val, 25, rel);
+    checkAlignment(loc, val, 2, rel);
+    loc[0] = val >> 17;
+    loc[1] = val >> 9;
+    loc[2] = val >> 1;
+    break;
+  case R_390_32:
+  case R_390_GOT32:
+  case R_390_GOTPLT32:
+  case R_390_GOTOFF:
+  case R_390_PLTOFF32:
+  case R_390_TLS_IE32:
+  case R_390_TLS_GOTIE32:
+  case R_390_TLS_GD32:
+  case R_390_TLS_LDM32:
+  case R_390_TLS_LDO32:
+  case R_390_TLS_LE32:
+    checkIntUInt(loc, val, 32, rel);
+    write32be(loc, val);
+    break;
+  case R_390_PC32:
+  case R_390_PLT32:
+    checkInt(loc, val, 32, rel);
+    write32be(loc, val);
+    break;
+  case R_390_PC32DBL:
+  case R_390_PLT32DBL:
+  case R_390_GOTPCDBL:
+  case R_390_GOTENT:
+  case R_390_GOTPLTENT:
+  case R_390_TLS_IEENT:
+    checkInt(loc, val, 33, rel);
+    checkAlignment(loc, val, 2, rel);
+    write32be(loc, val >> 1);
+    break;
+  case R_390_64:
+  case R_390_PC64:
+  case R_390_PLT64:
+  case R_390_GOT64:
+  case R_390_GOTPLT64:
+  case R_390_GOTOFF64:
+  case R_390_PLTOFF64:
+  case R_390_GOTPC:
+  case R_390_TLS_IE64:
+  case R_390_TLS_GOTIE64:
+  case R_390_TLS_GD64:
+  case R_390_TLS_LDM64:
+  case R_390_TLS_LDO64:
+  case R_390_TLS_LE64:
+  case R_390_TLS_DTPMOD:
+  case R_390_TLS_DTPOFF:
+  case R_390_TLS_TPOFF:
+    write64be(loc, val);
+    break;
+  case R_390_TLS_LOAD:
+  case R_390_TLS_GDCALL:
+  case R_390_TLS_LDCALL:
+    break;
+  default:
+    llvm_unreachable("unknown relocation");
+  }
+}
+
+TargetInfo *elf::getSystemZTargetInfo() {
+  static SystemZ t;
+  return &t;
+}
diff --git a/lld/ELF/CMakeLists.txt b/lld/ELF/CMakeLists.txt
index 475f7dea1dd7e9..83d816ddb0601e 100644
--- a/lld/ELF/CMakeLists.txt
+++ b/lld/ELF/CMakeLists.txt
@@ -33,6 +33,7 @@ add_lld_library(lldELF
   Arch/PPC64.cpp
   Arch/RISCV.cpp
   Arch/SPARCV9.cpp
+  Arch/SystemZ.cpp
   Arch/X86.cpp
   Arch/X86_64.cpp
   ARMErrataFix.cpp
diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp
index f4b7d1c9d5b973..8b2c32b1534821 100644
--- a/lld/ELF/Driver.cpp
+++ b/lld/ELF/Driver.cpp
@@ -200,6 +200,7 @@ static std::tuple<ELFKind, uint16_t, uint8_t> parseEmulation(StringRef emul) {
           .Case("msp430elf", {ELF32LEKind, EM_MSP430})
           .Case("elf64_amdgpu", {ELF64LEKind, EM_AMDGPU})
           .Case("elf64loongarch", {ELF64LEKind, EM_LOONGARCH})
+          .Case("elf64_s390", {ELF64BEKind, EM_S390})
           .Default({ELFNoneKind, EM_NONE});
 
   if (ret.first == ELFNoneKind)
@@ -1137,7 +1138,7 @@ static SmallVector<StringRef, 0> getSymbolOrderingFile(MemoryBufferRef mb) {
 static bool getIsRela(opt::InputArgList &args) {
   // The psABI specifies the default relocation entry format.
   bool rela = is_contained({EM_AARCH64, EM_AMDGPU, EM_HEXAGON, EM_LOONGARCH,
-                            EM_PPC, EM_PPC64, EM_RISCV, EM_X86_64},
+                            EM_PPC, EM_PPC64, EM_RISCV, EM_S390, EM_X86_64},
                            config->emachine);
   // If -z rel or -z rela is specified, use the last option.
   for (auto *arg : args.filtered(OPT_z)) {
diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp
index a292e873e72f71..6c7ef27cbd4942 100644
--- a/lld/ELF/InputFiles.cpp
+++ b/lld/ELF/InputFiles.cpp
@@ -1614,6 +1614,8 @@ static uint16_t getBitcodeMachineKind(StringRef path, const Triple &t) {
     return EM_RISCV;
   case Triple::sparcv9:
     return EM_SPARCV9;
+  case Triple::systemz:
+    return EM_S390;
   case Triple::x86:
     return t.isOSIAMCU() ? EM_IAMCU : EM_386;
   case Triple::x86_64:
diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp
index 0e0b9783bd88a0..e033a715b59214 100644
--- a/lld/ELF/InputSection.cpp
+++ b/lld/ELF/InputSection.cpp
@@ -354,9 +354,10 @@ InputSectionBase *InputSection::getRelocatedSection() const {
 
 template <class ELFT, class RelTy>
 void InputSection::copyRelocations(uint8_t *buf) {
-  if (config->relax && !config->relocatable && config->emachine == EM_RISCV) {
-    // On RISC-V, relaxation might change relocations: copy from internal ones
-    // that are updated by relaxation.
+  if (config->relax && !config->relocatable &&
+      (config->emachine == EM_RISCV || config->emachine == EM_LOONGARCH)) {
+    // On LoongArch and RISC-V, relaxation might change relocations: copy
+    // from internal ones that are updated by relaxation.
     InputSectionBase *sec = getRelocatedSection();
     copyRelocations<ELFT, RelTy>(buf, llvm::make_range(sec->relocations.begin(),
                                                        sec->relocations.end()));
@@ -654,6 +655,7 @@ static int64_t getTlsTpOffset(const Symbol &s) {
 
     // Variant 2.
   case EM_HEXAGON:
+  case EM_S390:
   case EM_SPARCV9:
   case EM_386:
   case EM_X86_64:
@@ -716,6 +718,10 @@ uint64_t InputSectionBase::getRelocTargetVA(const InputFile *file, RelType type,
   case R_GOT_PC:
   case R_RELAX_TLS_GD_TO_IE:
     return sym.getGotVA() + a - p;
+  case R_GOTPLT_GOTREL:
+    return sym.getGotPltVA() + a - in.got->getVA();
+  case R_GOTPLT_PC:
+    return sym.getGotPltVA() + a - p;
   case R_LOONGARCH_GOT_PAGE_PC:
     if (sym.hasFlag(NEEDS_TLSGD))
       return getLoongArchPageDelta(in.got->getGlobalDynAddr(sym) + a, p, type);
@@ -807,6 +813,8 @@ uint64_t InputSectionBase::getRelocTargetVA(const InputFile *file, RelType type,
     return getLoongArchPageDelta(sym.getPltVA() + a, p, type);
   case R_PLT_GOTPLT:
     return sym.getPltVA() + a - in.gotPlt->getVA();
+  case R_PLT_GOTREL:
+    return sym.getPltVA() + a - in.got->getVA();
   case R_PPC32_PLTREL:
     // R_PPC_PLTREL24 uses the addend (usually 0 or 0x8000) to indicate r30
     // stores _GLOBAL_OFFSET_TABLE_ or .got2+0x8000. The addend is ignored for
diff --git a/lld/ELF/InputSection.h b/lld/ELF/InputSection.h
index dda4242d8be1c1..243b28d90bb4c1 100644
--- a/lld/ELF/InputSection.h
+++ b/lld/ELF/InputSection.h
@@ -102,7 +102,23 @@ class SectionBase {
         link(link), info(info) {}
 };
 
-struct RISCVRelaxAux;
+struct SymbolAnchor {
+  uint64_t offset;
+  Defined *d;
+  bool end; // true for the anchor of st_value+st_size
+};
+
+struct RelaxAux {
+  // This records symbol start and end offsets which will be adjusted according
+  // to the nearest relocDeltas element.
+  SmallVector<SymbolAnchor, 0> anchors;
+  // For relocations[i], the actual offset is
+  //   r_offset - (i ? relocDeltas[i-1] : 0).
+  std::unique_ptr<uint32_t[]> relocDeltas;
+  // For relocations[i], the actual type is relocTypes[i].
+  std::unique_ptr<RelType[]> relocTypes;
+  SmallVector<uint32_t, 0> writes;
+};
 
 // This corresponds to a section of an input file.
 class InputSectionBase : public SectionBase {
@@ -226,9 +242,9 @@ class InputSectionBase : public SectionBase {
     // basic blocks.
     JumpInstrMod *jumpInstrMod = nullptr;
 
-    // Auxiliary information for RISC-V linker relaxation. RISC-V does not use
-    // jumpInstrMod.
-    RISCVRelaxAux *relaxAux;
+    // Auxiliary information for RISC-V and LoongArch linker relaxation.
+    // They do not use jumpInstrMod.
+    RelaxAux *relaxAux;
 
     // The compressed content size when `compressed` is true.
     size_t compressedSize;
diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp
index 79c8230724ade7..619fbaf5dc5452 100644
--- a/lld/ELF/Relocations.cpp
+++ b/lld/ELF/Relocations.cpp
@@ -203,8 +203,9 @@ static bool isAbsoluteValue(const Symbol &sym) {
 
 // Returns true if Expr refers a PLT entry.
 static bool needsPlt(RelExpr expr) {
-  return oneof<R_PLT, R_PLT_PC, R_PLT_GOTPLT, R_LOONGARCH_PLT_PAGE_PC,
-               R_PPC32_PLTREL, R_PPC64_CALL_PLT>(expr);
+  return oneof<R_PLT, R_PLT_PC, R_PLT_GOTREL, R_PLT_GOTPLT, R_GOTPLT_GOTREL,
+               R_GOTPLT_PC, R_LOONGARCH_PLT_PAGE_PC, R_PPC32_PLTREL,
+               R_PPC64_CALL_PLT>(expr);
 }
 
 bool lld::elf::needsGot(RelExpr expr) {
@@ -233,6 +234,8 @@ static RelExpr toPlt(RelExpr expr) {
     return R_PLT_PC;
   case R_ABS:
     return R_PLT;
+  case R_GOTREL:
+    return R_PLT_GOTREL;
   default:
     return expr;
   }
@@ -253,6 +256,8 @@ static RelExpr fromPlt(RelExpr expr) {
     return R_ABS;
   case R_PLT_GOTPLT:
     return R_GOTPLTREL;
+  case R_PLT_GOTREL:
+    return R_GOTREL;
   default:
     return expr;
   }
@@ -935,7 +940,7 @@ void elf::addGotEntry(Symbol &sym) {
 static void addTpOffsetGotEntry(Symbol &sym) {
   in.got->addEntry(sym);
   uint64_t off = sym.getGotOffset();
-  if (!sym.isPreemptible && !config->isPic) {
+  if (!sym.isPreemptible && !config->shared) {
     in.got->addConstant({R_TPREL, target->symbolicRel, off, 0, &sym});
     return;
   }
@@ -979,10 +984,10 @@ bool RelocationScanner::isStaticLinkTimeConstant(RelExpr e, RelType type,
   if (oneof<R_GOTPLT, R_GOT_OFF, R_RELAX_HINT, R_MIPS_GOT_LOCAL_PAGE,
             R_MIPS_GOTREL, R_MIPS_GOT_OFF, R_MIPS_GOT_OFF32, R_MIPS_GOT_GP_PC,
             R_AARCH64_GOT_PAGE_PC, R_GOT_PC, R_GOTONLY_PC, R_GOTPLTONLY_PC,
-            R_PLT_PC, R_PLT_GOTPLT, R_PPC32_PLTREL, R_PPC64_CALL_PLT,
-            R_PPC64_RELAX_TOC, R_RISCV_ADD, R_AARCH64_GOT_PAGE,
-            R_LOONGARCH_PLT_PAGE_PC, R_LOONGARCH_GOT, R_LOONGARCH_GOT_PAGE_PC>(
-          e))
+            R_PLT_PC, R_PLT_GOTREL, R_PLT_GOTPLT, R_GOTPLT_GOTREL, R_GOTPLT_PC,
+            R_PPC32_PLTREL, R_PPC64_CALL_PLT, R_PPC64_RELAX_TOC, R_RISCV_ADD,
+            R_AARCH64_GOT_PAGE, R_LOONGARCH_PLT_PAGE_PC, R_LOONGARCH_GOT,
+            R_LOONGARCH_GOT_PAGE_PC>(e))
     return true;
 
   // These never do, except if the entire file is position dependent or if
@@ -1374,8 +1379,8 @@ static unsigned handleTlsRelocation(RelType type, Symbol &sym,
             R_LOONGARCH_GOT_PAGE_PC, R_GOT_OFF, R_TLSIE_HINT>(expr)) {
     ctx.hasTlsIe.store(true, std::memory_order_relaxed);
     // Initial-Exec relocs can be optimized to Local-Exec if the symbol is
-    // locally defined.
-    if (execOptimize && isLocalInExecutable) {
+    // locally defined.  This is not supported on SystemZ.
+    if (execOptimize && isLocalInExecutable && config->emachine != EM_S390) {
       c.addReloc({R_RELAX_TLS_IE_TO_LE, type, offset, addend, &sym});
     } else if (expr != R_TLSIE_HINT) {
       sym.setFlags(NEEDS_TLSIE);
@@ -1534,8 +1539,10 @@ void RelocationScanner::scan(ArrayRef<RelTy> rels) {
   // For EhInputSection, OffsetGetter expects the relocations to be sorted by
   // r_offset. In rare cases (.eh_frame pieces are reordered by a linker
   // script), the relocations may be unordered.
+  // On SystemZ, all sections need to be sorted by r_offset, to allow TLS
+  // relaxation to be handled correctly - see SystemZ::getTlsGdRelaxSkip.
   SmallVector<RelTy, 0> storage;
-  if (isa<EhInputSection>(sec))
+  if (isa<EhInputSection>(sec) || config->emachine == EM_S390)
     rels = sortRels(rels, storage);
 
   end = static_cast<const void *>(rels.end());
diff --git a/lld/ELF/Relocations.h b/lld/ELF/Relocations.h
index cfb9092149f3e0..7eb8a811e6934f 100644
--- a/lld/ELF/Relocations.h
+++ b/lld/ELF/Relocations.h
@@ -40,11 +40,14 @@ enum RelExpr {
   R_GOTPLT,
   R_GOTPLTREL,
   R_GOTREL,
+  R_GOTPLT_GOTREL,
+  R_GOTPLT_PC,
   R_NONE,
   R_PC,
   R_PLT,
   R_PLT_PC,
   R_PLT_GOTPLT,
+  R_PLT_GOTREL,
   R_RELAX_HINT,
   R_RELAX_GOT_PC,
   R_RELAX_GOT_PC_NOPIC,
diff --git a/lld/ELF/ScriptParser.cpp b/lld/ELF/ScriptParser.cpp
index dd69916d6b05e8..f0ede1f43bbdb3 100644
--- a/lld/ELF/ScriptParser.cpp
+++ b/lld/ELF/ScriptParser.cpp
@@ -445,6 +445,7 @@ static std::pair<ELFKind, uint16_t> parseBfdName(StringRef s) {
       .Case("elf32-msp430", {ELF32LEKind, EM_MSP430})
       .Case("elf32-loongarch", {ELF32LEKind, EM_LOONGARCH})
       .Case("elf64-loongarch", {ELF64LEKind, EM_LOONGARCH})
+      .Case("elf64-s390", {ELF64BEKind, EM_S390})
       .Default({ELFNoneKind, EM_NONE});
 }
 
diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp
index 4b413163314b2e..bada394aa30d7d 100644
--- a/lld/ELF/SyntheticSections.cpp
+++ b/lld/ELF/SyntheticSections.cpp
@@ -1419,6 +1419,9 @@ DynamicSection<ELFT>::computeContents() {
     case EM_MIPS:
       addInSec(DT_MIPS_PLTGOT, *in.gotPlt);
       break;
+    case EM_S390:
+      addInSec(DT_PLTGOT, *in.got);
+      break;
     case EM_SPARCV9:
       addInSec(DT_PLTGOT, *in.plt);
       break;
diff --git a/lld/ELF/Target.cpp b/lld/ELF/Target.cpp
index 671d22cc66a0e9..b7922425a34e43 100644
--- a/lld/ELF/Target.cpp
+++ b/lld/ELF/Target.cpp
@@ -87,6 +87,8 @@ TargetInfo *elf::getTarget() {
     return getRISCVTargetInfo();
   case EM_SPARCV9:
     return getSPARCV9TargetInfo();
+  case EM_S390:
+    return getSystemZTargetInfo();
   case EM_X86_64:
     return getX86_64TargetInfo();
   }
diff --git a/lld/ELF/Target.h b/lld/ELF/Target.h
index ab6b6b9c013ba3..0cefa318135662 100644
--- a/lld/ELF/Target.h
+++ b/lld/ELF/Target.h
@@ -95,6 +95,8 @@ class TargetInfo {
 
   // Do a linker relaxation pass and return true if we changed something.
   virtual bool relaxOnce(int pass) const { return false; }
+  // Do finalize relaxation after collecting relaxation infos.
+  virtual void finalizeRelax(int passes) const {}
 
   virtual void applyJumpInstrMod(uint8_t *loc, JumpModType type,
                                  JumpModType val) const {}
@@ -186,6 +188,7 @@ TargetInfo *getPPC64TargetInfo();
 TargetInfo *getPPCTargetInfo();
 TargetInfo *getRISCVTargetInfo();
 TargetInfo *getSPARCV9TargetInfo();
+TargetInfo *getSystemZTargetInfo();
 TargetInfo *getX86TargetInfo();
 TargetInfo *getX86_64TargetInfo();
 template <class ELFT> TargetInfo *getMipsTargetInfo();
@@ -236,6 +239,7 @@ void addArmSyntheticSectionMappingSymbol(Defined *);
 void sortArmMappingSymbols();
 void convertArmInstructionstoBE8(InputSection *sec, uint8_t *buf);
 void createTaggedSymbols(const SmallVector<ELFFileBase *, 0> &files);
+void initSymbolAnchors();
 
 LLVM_LIBRARY_VISIBILITY extern const TargetInfo *target;
 TargetInfo *getTarget();
diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp
index 501c10f358497b..6df43a34be013a 100644
--- a/lld/ELF/Writer.cpp
+++ b/lld/ELF/Writer.cpp
@@ -1752,8 +1752,8 @@ template <class ELFT> void Writer<ELFT>::finalizeAddressDependentContent() {
       }
     }
   }
-  if (!config->relocatable && config->emachine == EM_RISCV)
-    riscvFinalizeRelax(pass);
+  if (!config->relocatable)
+    target->finalizeRelax(pass);
 
   if (config->relocatable)
     for (OutputSection *sec : outputSections)
diff --git a/lld/MinGW/Driver.cpp b/lld/MinGW/Driver.cpp
index 4752d92e3b1d71..7b16764dd2c7ce 100644
--- a/lld/MinGW/Driver.cpp
+++ b/lld/MinGW/Driver.cpp
@@ -448,6 +448,10 @@ bool link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
     add("-lto-cs-profile-generate");
   if (auto *arg = args.getLastArg(OPT_lto_cs_profile_file))
     add("-lto-cs-profile-file:" + StringRef(arg->getValue()));
+  if (args.hasArg(OPT_plugin_opt_emit_llvm))
+    add("-lldemit:llvm");
+  if (args.hasArg(OPT_lto_emit_asm))
+    add("-lldemit:asm");
 
   if (auto *a = args.getLastArg(OPT_thinlto_cache_dir))
     add("-lldltocache:" + StringRef(a->getValue()));
diff --git a/lld/MinGW/Options.td b/lld/MinGW/Options.td
index 02f00f27406c08..9a0a96aac7f1c6 100644
--- a/lld/MinGW/Options.td
+++ b/lld/MinGW/Options.td
@@ -158,6 +158,8 @@ def lto_cs_profile_generate: FF<"lto-cs-profile-generate">,
   HelpText<"Perform context sensitive PGO instrumentation">;
 def lto_cs_profile_file: JJ<"lto-cs-profile-file=">,
   HelpText<"Context sensitive profile file path">;
+def lto_emit_asm: FF<"lto-emit-asm">,
+  HelpText<"Emit assembly code">;
 
 def thinlto_cache_dir: JJ<"thinlto-cache-dir=">,
   HelpText<"Path to ThinLTO cached object file directory">;
@@ -181,6 +183,9 @@ def: J<"plugin-opt=cs-profile-path=">,
   Alias<lto_cs_profile_file>, HelpText<"Alias for --lto-cs-profile-file">;
 def plugin_opt_dwo_dir_eq: J<"plugin-opt=dwo_dir=">,
   HelpText<"Directory to store .dwo files when LTO and debug fission are used">;
+def plugin_opt_emit_asm: F<"plugin-opt=emit-asm">,
+  Alias<lto_emit_asm>, HelpText<"Alias for --lto-emit-asm">;
+def plugin_opt_emit_llvm: F<"plugin-opt=emit-llvm">;
 def: J<"plugin-opt=jobs=">, Alias<thinlto_jobs_eq>, HelpText<"Alias for --thinlto-jobs=">;
 def plugin_opt_mcpu_eq: J<"plugin-opt=mcpu=">;
 
diff --git a/lld/docs/ReleaseNotes.rst b/lld/docs/ReleaseNotes.rst
index fa0e7f2bc0b3ea..56ba3463aeadc0 100644
--- a/lld/docs/ReleaseNotes.rst
+++ b/lld/docs/ReleaseNotes.rst
@@ -82,9 +82,46 @@ COFF Improvements
 
 * Added support for ``--time-trace`` and associated ``--time-trace-granularity``.
   This generates a .json profile trace of the linker execution.
+  (`#68236 <https://github.com/llvm/llvm-project/pull/68236>`_)
+
+* The ``-dependentloadflag`` option was implemented.
+  (`#71537 <https://github.com/llvm/llvm-project/pull/71537>`_)
 
 * LLD now prefers library paths specified with ``-libpath:`` over the implicitly
   detected toolchain paths.
+  (`#78039 <https://github.com/llvm/llvm-project/pull/78039>`_)
+
+* Added new options ``-lldemit:llvm`` and ``-lldemit:asm`` for getting
+  the output of LTO compilation as LLVM bitcode or assembly.
+  (`#66964 <https://github.com/llvm/llvm-project/pull/66964>`_)
+  (`#67079 <https://github.com/llvm/llvm-project/pull/67079>`_)
+
+* Added a new option ``-build-id`` for generating a ``.buildid`` section
+  when not generating a PDB. A new symbol ``__buildid`` is generated by
+  the linker, allowing code to reference the build ID of the binary.
+  (`#71433 <https://github.com/llvm/llvm-project/pull/71433>`_)
+  (`#74652 <https://github.com/llvm/llvm-project/pull/74652>`_)
+
+* A new, LLD specific option, ``-lld-allow-duplicate-weak``, was added
+  for allowing duplicate weak symbols.
+  (`#68077 <https://github.com/llvm/llvm-project/pull/68077>`_)
+
+* More correctly handle LTO of files that define ``__imp_`` prefixed dllimport
+  redirections.
+  (`#70777 <https://github.com/llvm/llvm-project/pull/70777>`_)
+  (`#71376 <https://github.com/llvm/llvm-project/pull/71376>`_)
+  (`#72989 <https://github.com/llvm/llvm-project/pull/72989>`_)
+
+* Linking undefined references to weak symbols with LTO now works.
+  (`#70430 <https://github.com/llvm/llvm-project/pull/70430>`_)
+
+* Use the ``SOURCE_DATE_EPOCH`` environment variable for the PE header and
+  debug directory timestamps, if neither the ``/Brepro`` nor ``/timestamp:``
+  options have been specified. This makes the linker output reproducible by
+  setting this environment variable.
+  (`#81326 <https://github.com/llvm/llvm-project/pull/81326>`_)
+
+* Lots of incremental work towards supporting linking ARM64EC binaries.
 
 MinGW Improvements
 ------------------
@@ -92,19 +129,29 @@ MinGW Improvements
 * Added support for many LTO and ThinLTO options (most LTO options supported
   by the ELF driver, that are implemented by the COFF backend as well,
   should be supported now).
+  (`D158412 <https://reviews.llvm.org/D158412>`_)
+  (`D158887 <https://reviews.llvm.org/D158887>`_)
+  (`#77387 <https://github.com/llvm/llvm-project/pull/77387>`_)
+  (`#81475 <https://github.com/llvm/llvm-project/pull/81475>`_)
 
 * LLD no longer tries to autodetect and use library paths from MSVC/WinSDK
   installations when run in MinGW mode; that mode of operation shouldn't
   ever be needed in MinGW mode, and could be a source of unexpected
   behaviours.
+  (`D144084 <https://reviews.llvm.org/D144084>`_)
 
 * The ``--icf=safe`` option now works as expected; it was previously a no-op.
-
-* More correctly handle LTO of files that define ``__imp_`` prefixed dllimport
-  redirections.
+  (`#70037 <https://github.com/llvm/llvm-project/pull/70037>`_)
 
 * The strip flags ``-S`` and ``-s`` now can be used to strip out DWARF debug
   info and symbol tables while emitting a PDB debug info file.
+  (`#75181 <https://github.com/llvm/llvm-project/pull/75181>`_)
+
+* The option ``--dll`` is handled as an alias for the ``--shared`` option.
+  (`#68575 <https://github.com/llvm/llvm-project/pull/68575>`_)
+
+* The option ``--sort-common`` is ignored now.
+  (`#66336 <https://github.com/llvm/llvm-project/pull/66336>`_)
 
 MachO Improvements
 ------------------
diff --git a/lld/test/COFF/lto-cache-errors.ll b/lld/test/COFF/lto-cache-errors.ll
index 55244e5690dc34..a46190a81b6230 100644
--- a/lld/test/COFF/lto-cache-errors.ll
+++ b/lld/test/COFF/lto-cache-errors.ll
@@ -1,4 +1,4 @@
-; REQUIRES: x86
+; REQUIRES: x86, non-root-user
 ;; Not supported on windows since we use permissions to deny the creation
 ; UNSUPPORTED: system-windows
 
diff --git a/lld/test/COFF/thinlto-emit-imports.ll b/lld/test/COFF/thinlto-emit-imports.ll
index a9f22c1dc2dcff..b47a6cea4eb7df 100644
--- a/lld/test/COFF/thinlto-emit-imports.ll
+++ b/lld/test/COFF/thinlto-emit-imports.ll
@@ -1,4 +1,4 @@
-; REQUIRES: x86
+; REQUIRES: x86, non-root-user
 
 ; Generate summary sections and test lld handling.
 ; RUN: opt -module-summary %s -o %t1.obj
diff --git a/lld/test/COFF/timestamp.test b/lld/test/COFF/timestamp.test
index c0658d6109811b..cc73af13c38ca6 100644
--- a/lld/test/COFF/timestamp.test
+++ b/lld/test/COFF/timestamp.test
@@ -4,19 +4,28 @@ RUN: lld-link %t.obj /debug /Brepro /entry:main /nodefaultlib /out:%t.1.exe
 RUN: lld-link %t.obj /debug /Brepro /entry:main /nodefaultlib /out:%t.2.exe
 RUN: lld-link %t.obj /debug /timestamp:0 /entry:main /nodefaultlib /out:%t.3.exe
 RUN: env SOURCE_DATE_EPOCH=0 lld-link %t.obj /debug /entry:main /nodefaultlib /out:%t.4.exe
-RUN: lld-link %t.obj /debug /timestamp:4294967295 /entry:main /nodefaultlib /out:%t.5.exe
-RUN: env SOURCE_DATE_EPOCH=4294967295 lld-link %t.obj /debug /entry:main /nodefaultlib /out:%t.6.exe
+# Test timestamps corresponding to INT32_TMAX
+RUN: lld-link %t.obj /debug /timestamp:2147483647 /entry:main /nodefaultlib /out:%t.5.exe
+RUN: env SOURCE_DATE_EPOCH=2147483647 lld-link %t.obj /debug /entry:main /nodefaultlib /out:%t.6.exe
+# Test that the command line option /timestamp has precedence over SOURCE_DATE_EPOCH
 RUN: env SOURCE_DATE_EPOCH=12345 lld-link %t.obj /debug /timestamp:0 /entry:main /nodefaultlib /out:%t.7.exe
-RUN: env LLD_IN_TEST=1 not lld-link %t.obj /debug /timestamp:4294967296 /entry:main /nodefaultlib /out:%t.8.exe 2>&1 | FileCheck %s --check-prefix=ERROR
-RUN: env SOURCE_DATE_EPOCH=4294967296 env LLD_IN_TEST=1 not lld-link %t.obj /debug /entry:main /nodefaultlib /out:%t.9.exe 2>&1 | FileCheck %s --check-prefix=ERROR2
+# Test timestamps corresponding to UINT32_TMAX
+RUN: lld-link %t.obj /debug /timestamp:4294967295 /entry:main /nodefaultlib /out:%t.8.exe
+RUN: env SOURCE_DATE_EPOCH=4294967295 lld-link %t.obj /debug /entry:main /nodefaultlib /out:%t.9.exe
+# Test that setting UINT32_MAX+1 as timestamp fails.
+RUN: env LLD_IN_TEST=1 not lld-link %t.obj /debug /timestamp:4294967296 /entry:main /nodefaultlib /out:%t.10.exe 2>&1 | FileCheck %s --check-prefix=ERROR
+RUN: env SOURCE_DATE_EPOCH=4294967296 env LLD_IN_TEST=1 not lld-link %t.obj /debug /entry:main /nodefaultlib /out:%t.11.exe 2>&1 | FileCheck %s --check-prefix=ERROR2
 RUN: llvm-readobj --file-headers --coff-debug-directory %t.1.exe | FileCheck %s --check-prefix=HASH
 RUN: llvm-readobj --file-headers --coff-debug-directory %t.2.exe | FileCheck %s --check-prefix=HASH
 RUN: llvm-readobj --file-headers --coff-debug-directory %t.3.exe | FileCheck %s --check-prefix=ZERO
 RUN: llvm-readobj --file-headers --coff-debug-directory %t.4.exe | FileCheck %s --check-prefix=ZERO
-RUN: llvm-readobj --file-headers --coff-debug-directory %t.5.exe | FileCheck %s --check-prefix=MAX
-RUN: llvm-readobj --file-headers --coff-debug-directory %t.6.exe | FileCheck %s --check-prefix=MAX
+RUN: llvm-readobj --file-headers --coff-debug-directory %t.5.exe | FileCheck %s --check-prefix=LARGE
+RUN: llvm-readobj --file-headers --coff-debug-directory %t.6.exe | FileCheck %s --check-prefix=LARGE
 RUN: llvm-readobj --file-headers --coff-debug-directory %t.7.exe | FileCheck %s --check-prefix=ZERO
 
+# Not inspecting %t.8.exe and %t.9.exe; llvm-readobj with a 32 bit time_t fails to print dates
+# past INT32_MAX correctly.
+
 HASH: ImageFileHeader {
 HASH: TimeDateStamp: [[STAMP:.*]]
 HASH: DebugDirectory [
@@ -27,10 +36,10 @@ ZERO: TimeDateStamp: 1970-01-01 00:00:00 (0x0)
 ZERO: DebugDirectory [
 ZERO: TimeDateStamp: 1970-01-01 00:00:00 (0x0)
 
-MAX: ImageFileHeader {
-MAX: TimeDateStamp: 2106-02-07 06:28:15 (0xFFFFFFFF)
-MAX: DebugDirectory [
-MAX: TimeDateStamp: 2106-02-07 06:28:15 (0xFFFFFFFF)
+LARGE: ImageFileHeader {
+LARGE: TimeDateStamp: 2038-01-19 03:14:07 (0x7FFFFFFF)
+LARGE: DebugDirectory [
+LARGE: TimeDateStamp: 2038-01-19 03:14:07 (0x7FFFFFFF)
 
 ERROR: error: invalid timestamp: 4294967296.  Expected 32-bit integer
 ERROR2: error: invalid SOURCE_DATE_EPOCH timestamp: 4294967296.  Expected 32-bit integer
diff --git a/lld/test/ELF/Inputs/systemz-init.s b/lld/test/ELF/Inputs/systemz-init.s
new file mode 100644
index 00000000000000..1611b69b4419e3
--- /dev/null
+++ b/lld/test/ELF/Inputs/systemz-init.s
@@ -0,0 +1,5 @@
+// glibc < 2.39 used to align .init and .fini code at a 4-byte boundary.
+// This file aims to recreate that behavior.
+	.section        .init,"ax",@progbits
+	.align	4
+	lg %r4, 272(%r15)
diff --git a/lld/test/ELF/basic-systemz.s b/lld/test/ELF/basic-systemz.s
new file mode 100644
index 00000000000000..f7bb0e8cbd020d
--- /dev/null
+++ b/lld/test/ELF/basic-systemz.s
@@ -0,0 +1,63 @@
+# REQUIRES: systemz
+# RUN: llvm-mc -filetype=obj -triple=s390x-unknown-linux %s -o %t.o
+# RUN: ld.lld --hash-style=sysv -discard-all -shared %t.o -o %t.so
+# RUN: llvm-readelf --file-header --program-headers --section-headers --dynamic-table %t.so | FileCheck %s
+
+# Exits with return code 55 on linux.
+.text
+  lghi 2,55
+  svc 1
+
+# CHECK:       ELF Header:
+# CHECK-NEXT:  Magic:   7f 45 4c 46 02 02 01 00 00 00 00 00 00 00 00 00
+# CHECK-NEXT:  Class:                             ELF64
+# CHECK-NEXT:  Data:                              2's complement, big endian
+# CHECK-NEXT:  Version:                           1 (current)
+# CHECK-NEXT:  OS/ABI:                            UNIX - System V
+# CHECK-NEXT:  ABI Version:                       0
+# CHECK-NEXT:  Type:                              DYN (Shared object file)
+# CHECK-NEXT:  Machine:                           IBM S/390
+# CHECK-NEXT:  Version:                           0x1
+# CHECK-NEXT:  Entry point address:               0x0
+# CHECK-NEXT:  Start of program headers:          64 (bytes into file)
+# CHECK-NEXT:  Start of section headers:          768 (bytes into file)
+# CHECK-NEXT:  Flags:                             0x0
+# CHECK-NEXT:  Size of this header:               64 (bytes)
+# CHECK-NEXT:  Size of program headers:           56 (bytes)
+# CHECK-NEXT:  Number of program headers:         7
+# CHECK-NEXT:  Size of section headers:           64 (bytes)
+# CHECK-NEXT:  Number of section headers:         11
+# CHECK-NEXT:  Section header string table index: 9
+
+# CHECK:       Section Headers:
+# CHECK-NEXT:  [Nr] Name              Type            Address          Off    Size   ES Flg Lk Inf Al
+# CHECK-NEXT:  [ 0]                   NULL            0000000000000000 000000 000000 00      0   0  0
+# CHECK-NEXT:  [ 1] .dynsym           DYNSYM          00000000000001c8 0001c8 000018 18   A  3   1  8
+# CHECK-NEXT:  [ 2] .hash             HASH            00000000000001e0 0001e0 000010 04   A  1   0  4
+# CHECK-NEXT:  [ 3] .dynstr           STRTAB          00000000000001f0 0001f0 000001 00   A  0   0  1
+# CHECK-NEXT:  [ 4] .text             PROGBITS        00000000000011f4 0001f4 000006 00  AX  0   0  4
+# CHECK-NEXT:  [ 5] .dynamic          DYNAMIC         0000000000002200 000200 000060 10  WA  3   0  8
+# CHECK-NEXT:  [ 6] .relro_padding    NOBITS          0000000000002260 000260 000da0 00  WA  0   0  1
+# CHECK-NEXT:  [ 7] .comment          PROGBITS        0000000000000000 000260 000008 01  MS  0   0  1
+# CHECK-NEXT:  [ 8] .symtab           SYMTAB          0000000000000000 000268 000030 18     10   2  8
+# CHECK-NEXT:  [ 9] .shstrtab         STRTAB          0000000000000000 000298 000058 00      0   0  1
+# CHECK-NEXT:  [10] .strtab           STRTAB          0000000000000000 0002f0 00000a 00      0   0  1
+
+# CHECK:       Program Headers:
+# CHECK-NEXT:  Type           Offset   VirtAddr           PhysAddr           FileSiz  MemSiz   Flg Align
+# CHECK-NEXT:  PHDR           0x000040 0x0000000000000040 0x0000000000000040 0x000188 0x000188 R   0x8
+# CHECK-NEXT:  LOAD           0x000000 0x0000000000000000 0x0000000000000000 0x0001f1 0x0001f1 R   0x1000
+# CHECK-NEXT:  LOAD           0x0001f4 0x00000000000011f4 0x00000000000011f4 0x000006 0x000006 R E 0x1000
+# CHECK-NEXT:  LOAD           0x000200 0x0000000000002200 0x0000000000002200 0x000060 0x000e00 RW  0x1000
+# CHECK-NEXT:  DYNAMIC        0x000200 0x0000000000002200 0x0000000000002200 0x000060 0x000060 RW  0x8
+# CHECK-NEXT:  GNU_RELRO      0x000200 0x0000000000002200 0x0000000000002200 0x000060 0x000e00 R   0x1
+# CHECK-NEXT:  GNU_STACK      0x000000 0x0000000000000000 0x0000000000000000 0x000000 0x000000 RW  0x0
+
+# CHECK:       Dynamic section at offset 0x200 contains 6 entries:
+# CHECK-NEXT:  Tag                Type     Name/Value
+# CHECK-NEXT:  0x0000000000000006 (SYMTAB) 0x1c8
+# CHECK-NEXT:  0x000000000000000b (SYMENT) 24 (bytes)
+# CHECK-NEXT:  0x0000000000000005 (STRTAB) 0x1f0
+# CHECK-NEXT:  0x000000000000000a (STRSZ)  1 (bytes)
+# CHECK-NEXT:  0x0000000000000004 (HASH)   0x1e0
+# CHECK-NEXT:  0x0000000000000000 (NULL)   0x0
diff --git a/lld/test/ELF/emulation-systemz.s b/lld/test/ELF/emulation-systemz.s
new file mode 100644
index 00000000000000..dfdb4620954c8a
--- /dev/null
+++ b/lld/test/ELF/emulation-systemz.s
@@ -0,0 +1,29 @@
+# REQUIRES: systemz
+# RUN: llvm-mc -filetype=obj -triple=s390x-unknown-linux %s -o %t.o
+# RUN: ld.lld -m elf64_s390 %t.o -o %t1
+# RUN: llvm-readelf --file-header %t1 | FileCheck %s
+# RUN: ld.lld %t.o -o %t2
+# RUN: llvm-readelf --file-header %t2 | FileCheck %s
+# RUN: echo 'OUTPUT_FORMAT(elf64-s390)' > %t.script
+# RUN: ld.lld %t.script %t.o -o %t3
+# RUN: llvm-readelf --file-header %t3 | FileCheck %s
+
+# CHECK:       ELF Header:
+# CHECK-NEXT:  Magic:   7f 45 4c 46 02 02 01 00 00 00 00 00 00 00 00 00
+# CHECK-NEXT:  Class:                             ELF64
+# CHECK-NEXT:  Data:                              2's complement, big endian
+# CHECK-NEXT:  Version:                           1 (current)
+# CHECK-NEXT:  OS/ABI:                            UNIX - System V
+# CHECK-NEXT:  ABI Version:                       0
+# CHECK-NEXT:  Type:                              EXEC (Executable file)
+# CHECK-NEXT:  Machine:                           IBM S/390
+# CHECK-NEXT:  Version:                           0x1
+# CHECK-NEXT:  Entry point address:
+# CHECK-NEXT:  Start of program headers:          64 (bytes into file)
+# CHECK-NEXT:  Start of section headers:
+# CHECK-NEXT:  Flags:                             0x0
+# CHECK-NEXT:  Size of this header:               64 (bytes)
+# CHECK-NEXT:  Size of program headers:           56 (bytes)
+
+.globl _start
+_start:
diff --git a/lld/test/ELF/loongarch-relax-align.s b/lld/test/ELF/loongarch-relax-align.s
new file mode 100644
index 00000000000000..ab61e15d5caca2
--- /dev/null
+++ b/lld/test/ELF/loongarch-relax-align.s
@@ -0,0 +1,126 @@
+# REQUIRES: loongarch
+
+# RUN: llvm-mc --filetype=obj --triple=loongarch32 --mattr=+relax %s -o %t.32.o
+# RUN: llvm-mc --filetype=obj --triple=loongarch64 --mattr=+relax %s -o %t.64.o
+# RUN: ld.lld --section-start=.text=0x10000 --section-start=.text2=0x20000 -e 0 %t.32.o -o %t.32
+# RUN: ld.lld --section-start=.text=0x10000 --section-start=.text2=0x20000 -e 0 %t.64.o -o %t.64
+# RUN: ld.lld --section-start=.text=0x10000 --section-start=.text2=0x20000 -e 0 %t.32.o --no-relax -o %t.32n
+# RUN: ld.lld --section-start=.text=0x10000 --section-start=.text2=0x20000 -e 0 %t.64.o --no-relax -o %t.64n
+# RUN: llvm-objdump -td --no-show-raw-insn %t.32 | FileCheck %s
+# RUN: llvm-objdump -td --no-show-raw-insn %t.64 | FileCheck %s
+# RUN: llvm-objdump -td --no-show-raw-insn %t.32n | FileCheck %s
+# RUN: llvm-objdump -td --no-show-raw-insn %t.64n | FileCheck %s
+
+## Test the R_LARCH_ALIGN without symbol index.
+# RUN: llvm-mc --filetype=obj --triple=loongarch64 --mattr=+relax %s -o %t.o64.o --defsym=old=1
+# RUN: ld.lld --section-start=.text=0x10000 --section-start=.text2=0x20000 -e 0 %t.o64.o -o %t.o64
+# RUN: ld.lld --section-start=.text=0x10000 --section-start=.text2=0x20000 -e 0 %t.o64.o --no-relax -o %t.o64n
+# RUN: llvm-objdump -td --no-show-raw-insn %t.o64 | FileCheck %s
+# RUN: llvm-objdump -td --no-show-raw-insn %t.o64n | FileCheck %s
+
+## -r keeps section contents unchanged.
+# RUN: ld.lld -r %t.64.o -o %t.64.r
+# RUN: llvm-objdump -dr --no-show-raw-insn %t.64.r | FileCheck %s --check-prefix=CHECKR
+
+# CHECK-DAG: {{0*}}10000 l .text  {{0*}}44 .Ltext_start
+# CHECK-DAG: {{0*}}10038 l .text  {{0*}}0c .L1
+# CHECK-DAG: {{0*}}10040 l .text  {{0*}}04 .L2
+# CHECK-DAG: {{0*}}20000 l .text2 {{0*}}14 .Ltext2_start
+
+# CHECK:      <.Ltext_start>:
+# CHECK-NEXT:   break 1
+# CHECK-NEXT:   break 2
+# CHECK-NEXT:   nop
+# CHECK-NEXT:   nop
+# CHECK-NEXT:   break 3
+# CHECK-NEXT:   break 4
+# CHECK-NEXT:   nop
+# CHECK-NEXT:   nop
+# CHECK-NEXT:   pcalau12i     $a0, 0
+# CHECK-NEXT:   addi.{{[dw]}} $a0, $a0, 0
+# CHECK-NEXT:   pcalau12i     $a0, 0
+# CHECK-NEXT:   addi.{{[dw]}} $a0, $a0, 56
+# CHECK-NEXT:   pcalau12i     $a0, 0
+# CHECK-NEXT:   addi.{{[dw]}} $a0, $a0, 64
+# CHECK-EMPTY:
+# CHECK-NEXT: <.L1>:
+# CHECK-NEXT:   nop
+# CHECK-NEXT:   nop
+# CHECK-EMPTY:
+# CHECK-NEXT: <.L2>:
+# CHECK-NEXT:   break 5
+
+# CHECK:      <.Ltext2_start>:
+# CHECK-NEXT:   pcalau12i     $a0, 0
+# CHECK-NEXT:   addi.{{[dw]}} $a0, $a0, 0
+# CHECK-NEXT:   nop
+# CHECK-NEXT:   nop
+# CHECK-NEXT:   break 6
+
+# CHECKR:      <.Ltext2_start>:
+# CHECKR-NEXT:   pcalau12i $a0, 0
+# CHECKR-NEXT:   {{0*}}00: R_LARCH_PCALA_HI20 .Ltext2_start
+# CHECKR-NEXT:   {{0*}}00: R_LARCH_RELAX      *ABS*
+# CHECKR-NEXT:   addi.d    $a0, $a0, 0
+# CHECKR-NEXT:   {{0*}}04: R_LARCH_PCALA_LO12 .Ltext2_start
+# CHECKR-NEXT:   {{0*}}04: R_LARCH_RELAX      *ABS*
+# CHECKR-NEXT:   nop
+# CHECKR-NEXT:   {{0*}}08: R_LARCH_ALIGN      .Lalign_symbol+0x4
+# CHECKR-NEXT:   nop
+# CHECKR-NEXT:   nop
+# CHECKR-NEXT:   break 6
+
+.macro .fake_p2align_4 max=0
+  .ifdef old
+    .if \max==0
+      .reloc ., R_LARCH_ALIGN, 0xc
+      nop; nop; nop
+    .endif
+  .else
+    .reloc ., R_LARCH_ALIGN, .Lalign_symbol + 0x4 + (\max << 8)
+    nop; nop; nop
+  .endif
+.endm
+
+  .text
+.Lalign_symbol:
+.Ltext_start:
+  break 1
+  break 2
+## +0x8: Emit 2 nops, delete 1 nop.
+  .fake_p2align_4
+
+  break 3
+## +0x14: Emit 3 nops > 8 bytes, not emit.
+  .fake_p2align_4 8
+
+  break 4
+  .fake_p2align_4 8
+## +0x18: Emit 2 nops <= 8 bytes.
+
+## Compensate
+.ifdef old
+  nop; nop
+.endif
+
+## +0x20: Test symbol value and symbol size can be handled.
+  la.pcrel $a0, .Ltext_start
+  la.pcrel $a0, .L1
+  la.pcrel $a0, .L2
+
+## +0x38: Emit 2 nops, delete 1 nop.
+.L1:
+  .fake_p2align_4
+.L2:
+  break 5
+  .size .L1, . - .L1
+  .size .L2, . - .L2
+  .size .Ltext_start, . - .Ltext_start
+
+## Test another text section.
+  .section .text2,"ax",@progbits
+.Ltext2_start:
+  la.pcrel $a0, .Ltext2_start
+  .fake_p2align_4
+  break 6
+  .size .Ltext2_start, . - .Ltext2_start
diff --git a/lld/test/ELF/loongarch-relax-emit-relocs.s b/lld/test/ELF/loongarch-relax-emit-relocs.s
new file mode 100644
index 00000000000000..581fce8c95caa4
--- /dev/null
+++ b/lld/test/ELF/loongarch-relax-emit-relocs.s
@@ -0,0 +1,49 @@
+# REQUIRES: loongarch
+## Test that we can handle --emit-relocs while relaxing.
+
+# RUN: llvm-mc --filetype=obj --triple=loongarch32 --mattr=+relax %s -o %t.32.o
+# RUN: llvm-mc --filetype=obj --triple=loongarch64 --mattr=+relax %s -o %t.64.o
+# RUN: ld.lld -Ttext=0x10000 --emit-relocs %t.32.o -o %t.32
+# RUN: ld.lld -Ttext=0x10000 --emit-relocs %t.64.o -o %t.64
+# RUN: llvm-objdump -dr %t.32 | FileCheck %s
+# RUN: llvm-objdump -dr %t.64 | FileCheck %s
+
+## -r should keep original relocations.
+# RUN: ld.lld -r %t.64.o -o %t.64.r
+# RUN: llvm-objdump -dr %t.64.r | FileCheck %s --check-prefix=CHECKR
+
+## --no-relax should keep original relocations.
+## TODO Due to R_LARCH_RELAX is not relaxed, it plays same as --relax now.
+# RUN: ld.lld -Ttext=0x10000 --emit-relocs --no-relax %t.64.o -o %t.64.norelax
+# RUN: llvm-objdump -dr %t.64.norelax | FileCheck %s
+
+# CHECK:      00010000 <_start>:
+# CHECK-NEXT:   pcalau12i $a0, 0
+# CHECK-NEXT:     R_LARCH_PCALA_HI20 _start
+# CHECK-NEXT:     R_LARCH_RELAX *ABS*
+# CHECK-NEXT:   addi.{{[dw]}} $a0, $a0, 0
+# CHECK-NEXT:     R_LARCH_PCALA_LO12 _start
+# CHECK-NEXT:     R_LARCH_RELAX *ABS*
+# CHECK-NEXT:   nop
+# CHECK-NEXT:     R_LARCH_ALIGN .Lla-relax-align0+0x4
+# CHECK-NEXT:   nop
+# CHECK-NEXT:   ret
+
+# CHECKR:      <_start>:
+# CHECKR-NEXT:   pcalau12i $a0, 0
+# CHECKR-NEXT:     R_LARCH_PCALA_HI20 _start
+# CHECKR-NEXT:     R_LARCH_RELAX *ABS*
+# CHECKR-NEXT:   addi.d $a0, $a0, 0
+# CHECKR-NEXT:     R_LARCH_PCALA_LO12 _start
+# CHECKR-NEXT:     R_LARCH_RELAX *ABS*
+# CHECKR-NEXT:   nop
+# CHECKR-NEXT:     R_LARCH_ALIGN .Lla-relax-align0+0x4
+# CHECKR-NEXT:   nop
+# CHECKR-NEXT:   nop
+# CHECKR-NEXT:   ret
+
+.global _start
+_start:
+  la.pcrel $a0, _start
+  .p2align 4
+  ret
diff --git a/lld/test/ELF/lto/resolution-err.ll b/lld/test/ELF/lto/resolution-err.ll
index 6dfa64b1b8b9ee..f9855abaff3279 100644
--- a/lld/test/ELF/lto/resolution-err.ll
+++ b/lld/test/ELF/lto/resolution-err.ll
@@ -1,5 +1,5 @@
 ; UNSUPPORTED: system-windows
-; REQUIRES: shell
+; REQUIRES: shell, non-root-user
 ; RUN: llvm-as %s -o %t.bc
 ; RUN: touch %t.resolution.txt
 ; RUN: chmod u-w %t.resolution.txt
diff --git a/lld/test/ELF/lto/systemz.ll b/lld/test/ELF/lto/systemz.ll
new file mode 100644
index 00000000000000..42bf4e32fb6d75
--- /dev/null
+++ b/lld/test/ELF/lto/systemz.ll
@@ -0,0 +1,18 @@
+; REQUIRES: systemz
+;; Test we can infer the e_machine value EM_S390 from a bitcode file.
+
+; RUN: llvm-as %s -o %t.o
+; RUN: ld.lld %t.o -o %t
+; RUN: llvm-readobj -h %t | FileCheck %s
+
+; CHECK: Class: 64-bit
+; CHECK: DataEncoding: BigEndian
+; CHECK: Machine: EM_S390
+
+target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-v128:64-a:8:16-n32:64"
+target triple = "s390x-unknown-linux-gnu"
+
+define void @_start() {
+entry:
+  ret void
+}
diff --git a/lld/test/ELF/lto/thinlto-cant-write-index.ll b/lld/test/ELF/lto/thinlto-cant-write-index.ll
index e664acbb17de1a..286fcddd4238a1 100644
--- a/lld/test/ELF/lto/thinlto-cant-write-index.ll
+++ b/lld/test/ELF/lto/thinlto-cant-write-index.ll
@@ -1,4 +1,4 @@
-; REQUIRES: x86
+; REQUIRES: x86, non-root-user
 
 ; Basic ThinLTO tests.
 ; RUN: opt -module-summary %s -o %t1.o
diff --git a/lld/test/ELF/lto/thinlto-emit-imports.ll b/lld/test/ELF/lto/thinlto-emit-imports.ll
index 6d0e1e65047db4..253ec08619c982 100644
--- a/lld/test/ELF/lto/thinlto-emit-imports.ll
+++ b/lld/test/ELF/lto/thinlto-emit-imports.ll
@@ -1,4 +1,4 @@
-; REQUIRES: x86
+; REQUIRES: x86, non-root-user
 ;; Test a few properties not tested by thinlto-index-only.ll
 
 ; RUN: opt -module-summary %s -o %t1.o
diff --git a/lld/test/ELF/systemz-got.s b/lld/test/ELF/systemz-got.s
new file mode 100644
index 00000000000000..1d558aa3b02905
--- /dev/null
+++ b/lld/test/ELF/systemz-got.s
@@ -0,0 +1,16 @@
+# REQUIRES: systemz
+# RUN: llvm-mc -filetype=obj -triple=s390x-unknown-linux %s -o %t.o
+# RUN: llvm-mc -filetype=obj -triple=s390x-unknown-linux %p/Inputs/shared.s -o %t2.o
+# RUN: ld.lld -shared %t2.o -soname=%t2.so -o %t2.so
+
+# RUN: ld.lld -dynamic-linker /lib/ld64.so.1 %t.o %t2.so -o %t
+# RUN: llvm-readelf -S -r  %t | FileCheck %s
+
+# CHECK: .got              PROGBITS        {{.*}} {{.*}} 000020 00  WA  0   0  8
+
+# CHECK: Relocation section '.rela.dyn' at offset {{.*}} contains 1 entries:
+# CHECK: {{.*}}  000000010000000a R_390_GLOB_DAT         0000000000000000 bar + 0
+
+.global _start
+_start:
+	lgrl  %r1,bar@GOT
diff --git a/lld/test/ELF/systemz-gotent-relax-align.s b/lld/test/ELF/systemz-gotent-relax-align.s
new file mode 100644
index 00000000000000..c6326086f56db0
--- /dev/null
+++ b/lld/test/ELF/systemz-gotent-relax-align.s
@@ -0,0 +1,48 @@
+# REQUIRES: systemz
+## Verify that R_390_GOTENT optimization is not performed on misaligned symbols.
+
+# RUN: llvm-mc -filetype=obj -relax-relocations -triple=s390x-unknown-linux %s -o %t.o
+# RUN: ld.lld %t.o -o %t1
+# RUN: llvm-readelf -S -r -x .got -x .got.plt %t1 | FileCheck --check-prefixes=CHECK %s
+# RUN: llvm-objdump --no-print-imm-hex -d %t1 | FileCheck --check-prefix=DISASM %s
+
+## We retain one .got entry for the unaligned symbol.
+# CHECK:      Name              Type            Address          Off    Size   ES Flg Lk Inf Al
+# CHECK:      .got              PROGBITS        00000000010021e0 0001e0 000020 00  WA  0   0  8
+# CHECK-NEXT: .relro_padding    NOBITS          0000000001002200 000200 000e00 00  WA  0   0  1
+# CHECK-NEXT: .data             PROGBITS        0000000001003200 000200 000006 00  WA  0   0  2
+
+# CHECK-LABEL: Hex dump of section '.got':
+# CHECK-NEXT:    0x010021e0 00000000 00000000 00000000 00000000
+# CHECK-NEXT:    0x010021f0 00000000 00000000 00000000 01003205
+
+# DISASM:      Disassembly of section .text:
+# DISASM:      <_start>:
+# DISASM-NEXT:   larl    %r1, 0x1003200
+# DISASM-NEXT:   larl    %r1, 0x1003200
+# DISASM-NEXT:   lgrl    %r1, 0x10021f8
+# DISASM-NEXT:   lgrl    %r1, 0x10021f8
+
+.data
+.globl var_align
+.hidden var_align
+ .align 2
+var_align:
+ .long 0
+
+.data
+.globl var_unalign
+.hidden var_unalign
+ .align 2
+ .byte 0
+var_unalign:
+ .byte 0
+
+.text
+.globl _start
+.type _start, @function
+_start:
+ lgrl %r1, var_align@GOT
+ lgrl %r1, var_align@GOT
+ lgrl %r1, var_unalign@GOT
+ lgrl %r1, var_unalign@GOT
diff --git a/lld/test/ELF/systemz-gotent-relax-und-dso.s b/lld/test/ELF/systemz-gotent-relax-und-dso.s
new file mode 100644
index 00000000000000..57369a417fd445
--- /dev/null
+++ b/lld/test/ELF/systemz-gotent-relax-und-dso.s
@@ -0,0 +1,68 @@
+# REQUIRES: systemz
+# RUN: llvm-mc -filetype=obj -relax-relocations -triple=s390x-unknown-linux %s -o %t.o
+# RUN: llvm-mc -filetype=obj -relax-relocations -triple=s390x-unknown-linux %S/Inputs/gotpc-relax-und-dso.s -o %tdso.o
+# RUN: ld.lld -shared %tdso.o -soname=t.so -o %t.so
+# RUN: ld.lld --hash-style=sysv -shared %t.o %t.so -o %t
+# RUN: llvm-readelf -r %t | FileCheck --check-prefix=RELOC %s
+# RUN: llvm-objdump --no-print-imm-hex -d %t | FileCheck --check-prefix=DISASM %s
+
+# RELOC-LABEL: Relocation section '.rela.dyn' at offset {{.*}} contains 3 entries:
+# RELOC: 00000000000023f8 000000010000000a R_390_GLOB_DAT 00000000000012d8 foo + 0
+# RELOC: 0000000000002400 000000030000000a R_390_GLOB_DAT 0000000000000000 und + 0
+# RELOC: 0000000000002408 000000040000000a R_390_GLOB_DAT 0000000000000000 dsofoo + 0
+
+# DISASM:      Disassembly of section .text:
+# DISASM-EMPTY:
+# DISASM-NEXT: <foo>:
+# DISASM-NEXT:     bc 0, 0
+# DISASM:      <hid>:
+# DISASM-NEXT:     bc 0, 0
+# DISASM:      <_start>:
+# DISASM-NEXT:    lgrl    %r1, 0x2400
+# DISASM-NEXT:    lgrl    %r1, 0x2400
+# DISASM-NEXT:    lgrl    %r1, 0x2408
+# DISASM-NEXT:    lgrl    %r1, 0x2408
+# DISASM-NEXT:    larl    %r1, 0x12dc
+# DISASM-NEXT:    larl    %r1, 0x12dc
+# DISASM-NEXT:    lgrl    %r1, 0x23f8
+# DISASM-NEXT:    lgrl    %r1, 0x23f8
+# DISASM-NEXT:    lgrl    %r1, 0x2400
+# DISASM-NEXT:    lgrl    %r1, 0x2400
+# DISASM-NEXT:    lgrl    %r1, 0x2408
+# DISASM-NEXT:    lgrl    %r1, 0x2408
+# DISASM-NEXT:    larl    %r1, 0x12dc
+# DISASM-NEXT:    larl    %r1, 0x12dc
+# DISASM-NEXT:    lgrl    %r1, 0x23f8
+# DISASM-NEXT:    lgrl    %r1, 0x23f8
+
+.text
+.globl foo
+.type foo, @function
+foo:
+ nop
+
+.globl hid
+.hidden hid
+.type hid, @function
+hid:
+ nop
+
+.globl _start
+.type _start, @function
+_start:
+ lgrl %r1, und@GOT
+ lgrl %r1, und@GOT
+ lgrl %r1, dsofoo@GOT
+ lgrl %r1, dsofoo@GOT
+ lgrl %r1, hid@GOT
+ lgrl %r1, hid@GOT
+ lgrl %r1, foo@GOT
+ lgrl %r1, foo@GOT
+ lgrl %r1, und@GOT
+ lgrl %r1, und@GOT
+ lgrl %r1, dsofoo@GOT
+ lgrl %r1, dsofoo@GOT
+ lgrl %r1, hid@GOT
+ lgrl %r1, hid@GOT
+ lgrl %r1, foo@GOT
+ lgrl %r1, foo@GOT
diff --git a/lld/test/ELF/systemz-gotent-relax.s b/lld/test/ELF/systemz-gotent-relax.s
new file mode 100644
index 00000000000000..f665e1af9e53d2
--- /dev/null
+++ b/lld/test/ELF/systemz-gotent-relax.s
@@ -0,0 +1,91 @@
+# REQUIRES: systemz
+## Test R_390_GOTENT optimization.
+
+# RUN: llvm-mc -filetype=obj -relax-relocations -triple=s390x-unknown-linux %s -o %t.o
+# RUN: ld.lld %t.o -o %t1 --no-apply-dynamic-relocs
+# RUN: llvm-readelf -S -r -x .got.plt %t1 | FileCheck --check-prefixes=CHECK,NOAPPLY %s
+# RUN: ld.lld %t.o -o %t1 --apply-dynamic-relocs
+# RUN: llvm-readelf -S -r -x .got.plt %t1 | FileCheck --check-prefixes=CHECK,APPLY %s
+# RUN: ld.lld %t.o -o %t1
+# RUN: llvm-objdump --no-print-imm-hex -d %t1 | FileCheck --check-prefix=DISASM %s
+
+## --no-relax disables GOT optimization.
+# RUN: ld.lld --no-relax %t.o -o %t2
+# RUN: llvm-objdump --no-print-imm-hex -d %t2 | FileCheck --check-prefix=NORELAX %s
+
+## In our implementation, .got is retained even if all GOT-generating relocations are optimized.
+# CHECK:      Name              Type            Address          Off    Size   ES Flg Lk Inf Al
+# CHECK:      .iplt             PROGBITS        0000000001001240 000240 000020 00  AX  0   0 16
+# CHECK-NEXT: .got              PROGBITS        0000000001002260 000260 000018 00  WA  0   0  8
+# CHECK-NEXT: .relro_padding    NOBITS          0000000001002278 000278 000d88 00  WA  0   0  1
+# CHECK-NEXT: .got.plt          PROGBITS        0000000001003278 000278 000008 00  WA  0   0  8
+
+## There is one R_S390_IRELATIVE relocation.
+# CHECK-LABEL: Relocation section '.rela.dyn' at offset {{.*}} contains 1 entries:
+# CHECK:       0000000001003278  000000000000003d R_390_IRELATIVE                   10011e8
+
+# CHECK-LABEL: Hex dump of section '.got.plt':
+# NOAPPLY-NEXT:  0x01003278 00000000 00000000
+# APPLY-NEXT:    0x01003278 00000000 010011e8
+
+# DISASM:      Disassembly of section .text:
+# DISASM: 00000000010011e0 <foo>:
+# DISASM-NEXT:   bc      0, 0
+# DISASM: 00000000010011e4 <hid>:
+# DISASM-NEXT:   bc      0, 0
+# DISASM: 00000000010011e8 <ifunc>:
+# DISASM-NEXT:   br      %r14
+# DISASM: 00000000010011ea <_start>:
+# DISASM-NEXT:   larl    %r1, 0x10011e0
+# DISASM-NEXT:   larl    %r1, 0x10011e0
+# DISASM-NEXT:   larl    %r1, 0x10011e4
+# DISASM-NEXT:   larl    %r1, 0x10011e4
+# DISASM-NEXT:   lgrl    %r1, 0x1003278
+# DISASM-NEXT:   lgrl    %r1, 0x1003278
+# DISASM-NEXT:   larl    %r1, 0x10011e0
+# DISASM-NEXT:   larl    %r1, 0x10011e0
+# DISASM-NEXT:   larl    %r1, 0x10011e4
+# DISASM-NEXT:   larl    %r1, 0x10011e4
+# DISASM-NEXT:   lgrl    %r1, 0x1003278
+# DISASM-NEXT:   lgrl    %r1, 0x1003278
+
+# NORELAX-LABEL: <_start>:
+# NORELAX-COUNT-12: lgrl
+
+.text
+.globl foo
+
+.text
+.globl foo
+.type foo, @function
+foo:
+ nop
+
+.globl hid
+.hidden hid
+.type hid, @function
+hid:
+ nop
+
+.text
+.type ifunc STT_GNU_IFUNC
+.globl ifunc
+.type ifunc, @function
+ifunc:
+ br %r14
+
+.globl _start
+.type _start, @function
+_start:
+ lgrl %r1, foo@GOT
+ lgrl %r1, foo@GOT
+ lgrl %r1, hid@GOT
+ lgrl %r1, hid@GOT
+ lgrl %r1, ifunc@GOT
+ lgrl %r1, ifunc@GOT
+ lgrl %r1, foo@GOT
+ lgrl %r1, foo@GOT
+ lgrl %r1, hid@GOT
+ lgrl %r1, hid@GOT
+ lgrl %r1, ifunc@GOT
+ lgrl %r1, ifunc@GOT
diff --git a/lld/test/ELF/systemz-ifunc-nonpreemptible.s b/lld/test/ELF/systemz-ifunc-nonpreemptible.s
new file mode 100644
index 00000000000000..5056db302ca1c7
--- /dev/null
+++ b/lld/test/ELF/systemz-ifunc-nonpreemptible.s
@@ -0,0 +1,75 @@
+# REQUIRES: systemz
+# RUN: llvm-mc -filetype=obj -triple=s390x-none-linux-gnu %s -o %t.o
+# RUN: ld.lld -static %t.o -o %t
+# RUN: ld.lld -static %t.o -o %t.apply --apply-dynamic-relocs
+# RUN: llvm-readelf --section-headers --relocations --symbols %t | FileCheck %s
+# RUN: llvm-readelf -x .got.plt %t | FileCheck %s --check-prefix=NO-APPLY-RELOC
+# RUN: llvm-readelf -x .got.plt %t.apply | FileCheck %s --check-prefix=APPLY-RELOC
+# RUN: llvm-objdump --no-print-imm-hex -d --no-show-raw-insn %t | FileCheck %s --check-prefix=DISASM
+
+# CHECK:      Section Headers:
+# CHECK-NEXT:  [Nr] Name              Type            Address          Off    Size   ES Flg Lk Inf Al
+# CHECK-NEXT:  [ 0]                   NULL            0000000000000000 000000 000000 00      0   0  0
+# CHECK-NEXT:  [ 1] .rela.dyn         RELA            0000000001000158 000158 000030 18  AI  0   4  8
+# CHECK-NEXT:  [ 2] .text             PROGBITS        0000000001001188 000188 00001c 00  AX  0   0  4
+# CHECK-NEXT:  [ 3] .iplt             PROGBITS        00000000010011b0 0001b0 000040 00  AX  0   0 16
+# CHECK-NEXT:  [ 4] .got.plt          PROGBITS        00000000010021f0 0001f0 000010 00  WA  0   0  8
+
+# CHECK:      Relocation section '.rela.dyn' at offset 0x158 contains 2 entries:
+# CHECK-NEXT:     Offset             Info             Type               Symbol's Value  Symbol's Name + Addend
+# CHECK-NEXT: 00000000010021f0  000000000000003d R_390_IRELATIVE                   1001188
+# CHECK-NEXT: 00000000010021f8  000000000000003d R_390_IRELATIVE                   100118a
+
+# CHECK:      Symbol table '.symtab' contains 6 entries:
+# CHECK-NEXT:   Num:    Value          Size Type    Bind   Vis       Ndx Name
+# CHECK-NEXT:     0: 0000000000000000     0 NOTYPE  LOCAL  DEFAULT   UND
+# CHECK-NEXT:     1: 0000000001000158     0 NOTYPE  LOCAL  HIDDEN      1 __rela_iplt_start
+# CHECK-NEXT:     2: 0000000001000188     0 NOTYPE  LOCAL  HIDDEN      1 __rela_iplt_end
+# CHECK-NEXT:     3: 0000000001001188     0 IFUNC   GLOBAL DEFAULT     2 foo
+# CHECK-NEXT:     4: 000000000100118a     0 IFUNC   GLOBAL DEFAULT     2 bar
+# CHECK-NEXT:     5: 000000000100118c     0 NOTYPE  GLOBAL DEFAULT     2 _start
+
+# NO-APPLY-RELOC-LABEL:  Hex dump of section '.got.plt':
+# NO-APPLY-RELOC-NEXT:     0x010021f0 00000000 00000000 00000000 00000000
+# NO-APPLY-RELOC-EMPTY:
+
+# APPLY-RELOC-LABEL:  Hex dump of section '.got.plt':
+# APPLY-RELOC-NEXT:     0x010021f0 00000000 01001188 00000000 0100118a
+# APPLY-RELOC-EMPTY:
+
+# DISASM: Disassembly of section .text:
+# DISASM: 0000000001001188 <foo>:
+# DISASM-NEXT:  br      %r14
+# DISASM: 000000000100118a <bar>:
+# DISASM-NEXT:  br      %r14
+# DISASM: 000000000100118c  <_start>:
+# DISASM-NEXT:  brasl   %r14, 0x10011b0
+# DISASM-NEXT:  brasl   %r14, 0x10011d0
+# DISASM-NEXT:  larl    %r2, 0x1000158
+# DISASM-NEXT:  larl    %r2, 0x1000188
+# DISASM: Disassembly of section .iplt:
+# DISASM: <.iplt>:
+# DISASM:        10011b0:       larl    %r1, 0x10021f0
+# DISASM-NEXT:   10011b6:       lg      %r1, 0(%r1)
+# DISASM-NEXT:   10011bc:       br      %r1
+# DISASM:        10011d0:       larl    %r1, 0x10021f8
+# DISASM-NEXT:   10011d6:       lg      %r1, 0(%r1)
+# DISASM-NEXT:   10011dc:       br      %r1
+
+.text
+.type foo STT_GNU_IFUNC
+.globl foo
+foo:
+ br %r14
+
+.type bar STT_GNU_IFUNC
+.globl bar
+bar:
+ br %r14
+
+.globl _start
+_start:
+ brasl %r14, foo@plt
+ brasl %r14, bar@plt
+ larl %r2, __rela_iplt_start
+ larl %r2, __rela_iplt_end
diff --git a/lld/test/ELF/systemz-init-padding.s b/lld/test/ELF/systemz-init-padding.s
new file mode 100644
index 00000000000000..c56b98d43f1b0e
--- /dev/null
+++ b/lld/test/ELF/systemz-init-padding.s
@@ -0,0 +1,27 @@
+# REQUIRES: systemz
+# RUN: llvm-mc -filetype=obj -triple=s390x-unknown-linux %p/Inputs/systemz-init.s -o systemz-init.o
+# RUN: llvm-mc -filetype=obj -triple=s390x-unknown-linux %s -o %t.o
+# RUN: ld.lld -dynamic-linker /lib/ld64.so.1 %t.o systemz-init.o -o %t
+# RUN: llvm-objdump -d --no-show-raw-insn -j .init %t | FileCheck %s
+
+# glibc < 2.39 used to align .init and .fini code at a 4-byte boundary.
+# When that happens, the linker must not pad the code with invalid
+# instructions, e.g. null bytes.
+	.section        .init,"ax",@progbits
+	brasl %r14, startup
+
+# CHECK:      <.init>:
+# CHECK-NEXT: brasl %r14,
+# CHECK-NEXT: bcr     0, %r7
+# CHECK-NEXT: lg %r4, 272(%r15)
+
+	.text
+	.globl startup
+	.p2align 4
+startup:
+	br %r14
+
+	.globl main
+	.p2align 4
+main:
+	br %r14
diff --git a/lld/test/ELF/systemz-pie.s b/lld/test/ELF/systemz-pie.s
new file mode 100644
index 00000000000000..bb971a82fb8ced
--- /dev/null
+++ b/lld/test/ELF/systemz-pie.s
@@ -0,0 +1,38 @@
+# REQUIRES: systemz
+# RUN: llvm-mc -filetype=obj -triple=s390x-unknown-linux %s -o %t1.o
+
+## Check -pie.
+# RUN: ld.lld -pie %t1.o -o %t
+# RUN: llvm-readelf --file-headers --program-headers --dynamic %t | FileCheck %s
+
+# CHECK: ELF Header:
+# CHECK-NEXT:  Magic:   7f 45 4c 46 02 02 01 00 00 00 00 00 00 00 00 00
+# CHECK-NEXT:  Class:                             ELF64
+# CHECK-NEXT:  Data:                              2's complement, big endian
+# CHECK-NEXT:  Version:                           1 (current)
+# CHECK-NEXT:  OS/ABI:                            UNIX - System V
+# CHECK-NEXT:  ABI Version:                       0
+# CHECK-NEXT:  Type:                              DYN (Shared object file)
+# CHECK-NEXT:  Machine:                           IBM S/390
+# CHECK-NEXT:  Version:                           0x1
+
+# CHECK: Program Headers:
+# CHECK-NEXT:  Type           Offset   VirtAddr           PhysAddr           FileSiz  MemSiz   Flg Align
+# CHECK-NEXT:  PHDR           0x000040 0x0000000000000040 0x0000000000000040 0x000188 0x000188 R   0x8
+# CHECK-NEXT:  LOAD           0x000000 0x0000000000000000 0x0000000000000000 0x00020d 0x00020d R   0x1000
+# CHECK-NEXT:  LOAD           0x000210 0x0000000000002210 0x0000000000002210 0x000090 0x000df0 RW  0x1000
+# CHECK-NEXT:  DYNAMIC        0x000210 0x0000000000002210 0x0000000000002210 0x000090 0x000090 RW  0x8
+# CHECK-NEXT:  GNU_RELRO      0x000210 0x0000000000002210 0x0000000000002210 0x000090 0x000df0 R   0x1
+# CHECK-NEXT:  GNU_STACK      0x000000 0x0000000000000000 0x0000000000000000 0x000000 0x000000 RW  0x0
+
+# CHECK: Dynamic section at offset 0x210 contains 9 entries:
+# CHECK-NEXT:   Tag                Type       Name/Value
+# CHECK-NEXT:   0x000000006ffffffb (FLAGS_1)  PIE
+
+## Check -nopie
+# RUN: ld.lld -no-pie %t1.o -o %t2
+# RUN: llvm-readelf --file-headers %t2 | FileCheck %s --check-prefix=NOPIE
+# NOPIE-NOT: Type: DYN
+
+.globl _start
+_start:
diff --git a/lld/test/ELF/systemz-plt.s b/lld/test/ELF/systemz-plt.s
new file mode 100644
index 00000000000000..4669f01f588121
--- /dev/null
+++ b/lld/test/ELF/systemz-plt.s
@@ -0,0 +1,83 @@
+# REQUIRES: systemz
+# RUN: echo '.globl bar, weak; .type bar,@function; .type weak,@function; bar: weak:' > %t1.s
+
+# RUN: llvm-mc -filetype=obj -triple=s390x-unknown-linux %t1.s -o %t1.o
+# RUN: ld.lld -shared %t1.o -soname=t1.so -o %t1.so
+# RUN: llvm-mc -filetype=obj -triple=s390x-unknown-linux %s -o %t.o
+# RUN: ld.lld %t.o %t1.so -z separate-code -o %t
+# RUN: llvm-readelf -S -s -r -x .got.plt %t | FileCheck %s
+# RUN: llvm-objdump -d %t | FileCheck --check-prefixes=DIS %s
+
+# CHECK: Section Headers:
+# CHECK: .plt     PROGBITS 0000000001001020 001020 000060 00  AX  0   0 16
+# CHECK: .got     PROGBITS 00000000010020d0 0020d0 000018 00  WA  0   0  8
+# CHECK: .got.plt PROGBITS 00000000010030e8 0020e8 000010 00  WA  0   0  8
+
+# CHECK: Relocation section '.rela.plt' at offset {{.*}} contains 2 entries:
+# CHECK: 00000000010030e8 000000010000000b R_390_JMP_SLOT 0000000000000000 bar + 0
+# CHECK: 00000000010030f0 000000020000000b R_390_JMP_SLOT 0000000000000000 weak + 0
+
+## A canonical PLT has a non-zero st_value. bar and weak are called but their
+## addresses are not taken, so a canonical PLT is not necessary.
+# CHECK: Symbol table '.dynsym' contains 3 entries:
+# CHECK-NEXT:   Num:    Value          Size Type    Bind   Vis       Ndx Name
+# CHECK-NEXT:     0: 0000000000000000     0 NOTYPE  LOCAL  DEFAULT   UND
+# CHECK-NEXT:     1: 0000000000000000     0 FUNC    GLOBAL DEFAULT   UND bar
+# CHECK-NEXT:     2: 0000000000000000     0 FUNC    WEAK   DEFAULT   UND weak
+
+## The .got.plt slots relocated by .rela.plt point to .plt
+## This is required by glibc.
+# CHECK: Hex dump of section '.got.plt':
+# CHECK-NEXT: 0x010030e8 00000000 0100104e 00000000 0100106e
+
+# DIS: Disassembly of section .text:
+
+# DIS: 0000000001001000 <_start>:
+# DIS-NEXT: brasl	%r14, 0x1001012
+# DIS-NEXT: brasl	%r14, 0x1001040
+# DIS-NEXT: brasl	%r14, 0x1001060
+
+# DIS: 0000000001001012 <foo>:
+# DIS-NEXT: br	%r14
+
+# DIS: Disassembly of section .plt:
+
+# DIS: 0000000001001020 <.plt>:
+# DIS-NEXT: 1001020: e3 10 f0 38 00 24    	stg	%r1, 56(%r15)
+# DIS-NEXT: 1001026: c0 10 00 00 08 55          larl	%r1, 0x10020d0
+# DIS-NEXT: 100102c: d2 07 f0 30 10 08    	mvc	48(8,%r15), 8(%r1)
+# DIS-NEXT: 1001032: e3 10 10 10 00 04    	lg	%r1, 16(%r1)
+# DIS-NEXT: 1001038: 07 f1        	br	%r1
+# DIS-NEXT: 100103a: 07 00        	bcr	0, %r0
+# DIS-NEXT: 100103c: 07 00        	bcr	0, %r0
+# DIS-NEXT: 100103e: 07 00        	bcr	0, %r0
+# DIS-NEXT: 1001040: c0 10 00 00 10 54    	larl	%r1, 0x10030e8
+# DIS-NEXT: 1001046: e3 10 10 00 00 04    	lg	%r1, 0(%r1)
+# DIS-NEXT: 100104c: 07 f1        	br	%r1
+# DIS-NEXT: 100104e: 0d 10        	basr	%r1, 0
+# DIS-NEXT: 1001050: e3 10 10 0c 00 14    	lgf	%r1, 12(%r1)
+# DIS-NEXT: 1001056: c0 f4 ff ff ff e5    	jg	0x1001020
+# DIS-NEXT: 100105c: 00 00        	<unknown>
+# DIS-NEXT: 100105e: 00 00        	<unknown>
+# DIS-NEXT: 1001060: c0 10 00 00 10 48    	larl	%r1, 0x10030f0
+# DIS-NEXT: 1001066: e3 10 10 00 00 04    	lg	%r1, 0(%r1)
+# DIS-NEXT: 100106c: 07 f1        	br	%r1
+# DIS-NEXT: 100106e: 0d 10        	basr	%r1, 0
+# DIS-NEXT: 1001070: e3 10 10 0c 00 14    	lgf	%r1, 12(%r1)
+# DIS-NEXT: 1001076: c0 f4 ff ff ff d5    	jg	0x1001020
+# DIS-NEXT: 100107c: 00 00        	<unknown>
+# DIS-NEXT: 100107e: 00 18        	<unknown>
+
+.global _start, foo, bar
+.weak weak
+
+_start:
+  ## Use @plt to avoid generating direct references that would force
+  ## allocation of a canonical PLT entry.
+  brasl %r14, foo@plt
+  brasl %r14, bar@plt
+  brasl %r14, weak@plt
+
+## foo is local and non-preemptable, no PLT is generated.
+foo:
+  br %r14
diff --git a/lld/test/ELF/systemz-reloc-abs.s b/lld/test/ELF/systemz-reloc-abs.s
new file mode 100644
index 00000000000000..b5ad94d90d3a96
--- /dev/null
+++ b/lld/test/ELF/systemz-reloc-abs.s
@@ -0,0 +1,32 @@
+# REQUIRES: systemz
+# RUN: llvm-mc -filetype=obj -triple=s390x %s -o %t.o
+# RUN: llvm-mc -filetype=obj -triple=s390x %S/Inputs/abs255.s -o %t255.o
+# RUN: llvm-mc -filetype=obj -triple=s390x %S/Inputs/abs256.s -o %t256.o
+# RUN: llvm-mc -filetype=obj -triple=s390x %S/Inputs/abs257.s -o %t257.o
+
+# RUN: ld.lld %t.o %t256.o -o %t
+# RUN: llvm-readelf -x .data %t | FileCheck %s
+# CHECK: 0x{{[0-9a-f]+}} ff80ffff 8000ffff ffff8000 0000ffff
+# CHECK-NEXT:            ffffffff ffff8000 00000000 0000
+
+# RUN: not ld.lld %t.o %t255.o -o /dev/null 2>&1 | FileCheck --check-prefix=OVERFLOW1 %s
+# OVERFLOW1: relocation R_390_8 out of range: -129 is not in [-128, 255]
+# OVERFLOW1: relocation R_390_16 out of range: -32769 is not in [-32768, 65535]
+# OVERFLOW1: relocation R_390_32 out of range: -2147483649 is not in [-2147483648, 4294967295]
+
+# RUN: not ld.lld %t.o %t257.o -o /dev/null 2>&1 | FileCheck --check-prefix=OVERFLOW2 %s
+# OVERFLOW2: relocation R_390_8 out of range: 256 is not in [-128, 255]
+# OVERFLOW2: relocation R_390_16 out of range: 65536 is not in [-32768, 65535]
+# OVERFLOW2: relocation R_390_32 out of range: 4294967296 is not in [-2147483648, 4294967295]
+
+.globl _start
+_start:
+.data
+.byte foo - 1
+.byte foo - 384
+.word foo + 0xfeff
+.word foo - 0x8100
+.long foo + 0xfffffeff
+.long foo - 0x80000100
+.quad foo + 0xfffffffffffffeff
+.quad foo - 0x8000000000000100
diff --git a/lld/test/ELF/systemz-reloc-disp12.s b/lld/test/ELF/systemz-reloc-disp12.s
new file mode 100644
index 00000000000000..3d32707d149fe7
--- /dev/null
+++ b/lld/test/ELF/systemz-reloc-disp12.s
@@ -0,0 +1,21 @@
+# REQUIRES: systemz
+# RUN: llvm-mc -filetype=obj -triple=s390x -defsym DISP=291 %s -o %t1.o
+# RUN: llvm-mc -filetype=obj -triple=s390x -defsym DISP=4095 %s -o %t2.o
+# RUN: llvm-mc -filetype=obj -triple=s390x -defsym DISP=4096 %s -o %t3.o
+
+# RUN: ld.lld --section-start=.text=0x0 %t1.o -o %t1out
+# RUN: ld.lld --section-start=.text=0x0 %t2.o -o %t2out
+# RUN: not ld.lld --section-start=.text=0x0 %t3.o -o /dev/null 2>&1 | FileCheck %s --check-prefix RANGE
+
+# RANGE: relocation R_390_12 out of range: 4096 is not in [0, 4095]
+
+# RUN: llvm-readelf --hex-dump=.text %t1out | FileCheck %s -DINSN=58678123 --check-prefix DUMP
+# RUN: llvm-readelf --hex-dump=.text %t2out | FileCheck %s -DINSN=58678fff --check-prefix DUMP
+
+# DUMP:  0x00000000 [[INSN]]
+
+.text
+.globl _start
+_start:
+    .reloc .+2, R_390_12, DISP
+    l %r6, 0(%r7,%r8)
diff --git a/lld/test/ELF/systemz-reloc-disp20.s b/lld/test/ELF/systemz-reloc-disp20.s
new file mode 100644
index 00000000000000..88cd657c6ae3cb
--- /dev/null
+++ b/lld/test/ELF/systemz-reloc-disp20.s
@@ -0,0 +1,21 @@
+# REQUIRES: systemz
+# RUN: llvm-mc -filetype=obj -triple=s390x -defsym DISP=74565 %s -o %t1.o
+# RUN: llvm-mc -filetype=obj -triple=s390x -defsym DISP=524287 %s -o %t2.o
+# RUN: llvm-mc -filetype=obj -triple=s390x -defsym DISP=524288 %s -o %t3.o
+
+# RUN: ld.lld --section-start=.text=0x0 %t1.o -o %t1out
+# RUN: ld.lld --section-start=.text=0x0 %t2.o -o %t2out
+# RUN: not ld.lld --section-start=.text=0x0 %t3.o -o /dev/null 2>&1 | FileCheck %s --check-prefix RANGE
+
+# RANGE: relocation R_390_20 out of range: 524288 is not in [-524288, 524287]
+
+# RUN: llvm-readelf --hex-dump=.text %t1out | FileCheck %s -DINSN="e3678345 1204" --check-prefix DUMP
+# RUN: llvm-readelf --hex-dump=.text %t2out | FileCheck %s -DINSN="e3678fff 7f04" --check-prefix DUMP
+
+# DUMP:  0x00000000 [[INSN]]
+
+.text
+.globl _start
+_start:
+    .reloc .+2, R_390_20, DISP
+    lg %r6, 0(%r7,%r8)
diff --git a/lld/test/ELF/systemz-reloc-got.s b/lld/test/ELF/systemz-reloc-got.s
new file mode 100644
index 00000000000000..4b9ac16481f4cb
--- /dev/null
+++ b/lld/test/ELF/systemz-reloc-got.s
@@ -0,0 +1,92 @@
+# REQUIRES: systemz
+# RUN: llvm-mc -filetype=obj -triple=s390x-unknown-linux %s -o %t.o
+# RUN: ld.lld -z norelro -shared %t.o -soname=t.so -o %t.so
+## Note: Without norelro the distance between .got and .got.plt causes
+## R_390_GOTPLT12 relocations to always overflow.
+
+# RUN: llvm-readelf -S -x .data %t.so | FileCheck %s
+# RUN: llvm-objdump -d --no-show-raw-insn %t.so | FileCheck %s --check-prefix=DISASM
+
+# CHECK: Section Headers:
+# CHECK: .got PROGBITS 0000000000002458
+# CHECK: .got.plt PROGBITS 0000000000002480
+
+## Note: _GLOBAL_OFFSET_TABLE is at .got
+## GOT (foo) is at .got + 24 == 0x2470
+## GOT (bar) is at .got + 32 == 0x2478
+## GOTPLT (foo) is at .got.plt + 0 == .got + 40 == 0x2480
+## GOTPLT (bar) is at .got.plt + 8 == .got + 48 == 0x2488
+
+# DISASM:      larl %r12, 0x2458
+# DISASM-NEXT: larl %r1, 0x2470
+# DISASM-NEXT: larl %r1, 0x2478
+# DISASM-NEXT: larl %r1, 0x2480
+# DISASM-NEXT: larl %r1, 0x2488
+
+# DISASM-NEXT: l %r1, 24(%r12)
+# DISASM-NEXT: l %r1, 32(%r12)
+# DISASM-NEXT: l %r1, 40(%r12)
+# DISASM-NEXT: l %r1, 48(%r12)
+# DISASM-NEXT: lg %r1, 24(%r12)
+# DISASM-NEXT: lg %r1, 32(%r12)
+# DISASM-NEXT: lg %r1, 40(%r12)
+# DISASM-NEXT: lg %r1, 48(%r12)
+
+# CHECK: Hex dump of section '.data':
+# CHECK-NEXT: 00180020 00280030 00000018 00000020
+# CHECK-NEXT: 00000028 00000030 00000000 00000018
+# CHECK-NEXT: 00000000 00000020 00000000 00000028
+# CHECK-NEXT: 00000000 00000030
+
+.text
+larl %r12, _GLOBAL_OFFSET_TABLE_
+.reloc .+2, R_390_GOTENT, foo+2
+larl %r1, 0
+.reloc .+2, R_390_GOTENT, bar+2
+larl %r1, 0
+.reloc .+2, R_390_GOTPLTENT, foo+2
+larl %r1, 0
+.reloc .+2, R_390_GOTPLTENT, bar+2
+larl %r1, 0
+.reloc .+2, R_390_GOT12, foo
+l %r1, 0(%r12)
+.reloc .+2, R_390_GOT12, bar
+l %r1, 0(%r12)
+.reloc .+2, R_390_GOTPLT12, foo
+l %r1, 0(%r12)
+.reloc .+2, R_390_GOTPLT12, bar
+l %r1, 0(%r12)
+.reloc .+2, R_390_GOT20, foo
+lg %r1, 0(%r12)
+.reloc .+2, R_390_GOT20, bar
+lg %r1, 0(%r12)
+.reloc .+2, R_390_GOTPLT20, foo
+lg %r1, 0(%r12)
+.reloc .+2, R_390_GOTPLT20, bar
+lg %r1, 0(%r12)
+
+.data
+.reloc ., R_390_GOT16, foo
+.space 2
+.reloc ., R_390_GOT16, bar
+.space 2
+.reloc ., R_390_GOTPLT16, foo
+.space 2
+.reloc ., R_390_GOTPLT16, bar
+.space 2
+.reloc ., R_390_GOT32, foo
+.space 4
+.reloc ., R_390_GOT32, bar
+.space 4
+.reloc ., R_390_GOTPLT32, foo
+.space 4
+.reloc ., R_390_GOTPLT32, bar
+.space 4
+.reloc ., R_390_GOT64, foo
+.space 8
+.reloc ., R_390_GOT64, bar
+.space 8
+.reloc ., R_390_GOTPLT64, foo
+.space 8
+.reloc ., R_390_GOTPLT64, bar
+.space 8
diff --git a/lld/test/ELF/systemz-reloc-gotrel.s b/lld/test/ELF/systemz-reloc-gotrel.s
new file mode 100644
index 00000000000000..46669ecfa7fd01
--- /dev/null
+++ b/lld/test/ELF/systemz-reloc-gotrel.s
@@ -0,0 +1,36 @@
+# REQUIRES: systemz
+# RUN: llvm-mc -filetype=obj -triple=s390x-unknown-linux %s -o %t.o
+# RUN: ld.lld -shared %t.o -soname=t.so -o %t.so
+
+# RUN: llvm-readelf -S -s -x .data %t.so | FileCheck %s
+
+# CHECK: Section Headers:
+# CHECK: .plt PROGBITS 0000000000001290
+# CHECK: .got PROGBITS 0000000000002390
+
+# CHECK: Symbol table '.symtab'
+# CHECK: 0000000000001288 {{.*}}  bar
+
+## Note: foo is the first (and only) PLT entry, which resides at .plt + 32
+## PLTOFF (foo) is (.plt + 32) - .got == 0x12b0 - 0x2390 == 0xffffef20
+## GOTOFF (bar) is bar - .got == 0x1288 - 0x2390 == 0xffffeef8
+# CHECK: Hex dump of section '.data':
+# CHECK-NEXT: eef8ef20 ffffeef8 ffffef20 ffffffff
+# CHECK-NEXT: ffffeef8 ffffffff ffffef20
+
+bar:
+  br %r14
+
+.data
+.reloc ., R_390_GOTOFF16, bar
+.space 2
+.reloc ., R_390_PLTOFF16, foo
+.space 2
+.reloc ., R_390_GOTOFF, bar
+.space 4
+.reloc ., R_390_PLTOFF32, foo
+.space 4
+.reloc ., R_390_GOTOFF64, bar
+.space 8
+.reloc ., R_390_PLTOFF64, foo
+.space 8
diff --git a/lld/test/ELF/systemz-reloc-pc16.s b/lld/test/ELF/systemz-reloc-pc16.s
new file mode 100644
index 00000000000000..e1dad5af239d45
--- /dev/null
+++ b/lld/test/ELF/systemz-reloc-pc16.s
@@ -0,0 +1,39 @@
+# REQUIRES: systemz
+# RUN: rm -rf %t && split-file %s %t
+
+## Check recompile with -fPIC error message
+# RUN: llvm-mc -filetype=obj -triple=s390x-unknown-linux %t/shared.s -o %t/shared.o
+# RUN: not ld.lld -shared %t/shared.o -o /dev/null 2>&1 | FileCheck %s
+
+# CHECK: error: relocation R_390_PC16 cannot be used against symbol '_shared'; recompile with -fPIC
+# CHECK: >>> defined in {{.*}}
+# CHECK: >>> referenced by {{.*}}:(.data+0x1)
+
+## Check patching of negative addends
+
+# RUN: llvm-mc -filetype=obj -triple=s390x -defsym ADDEND=1 %t/addend.s -o %t/1.o
+# RUN: llvm-mc -filetype=obj -triple=s390x -defsym ADDEND=32768 %t/addend.s -o %t/2.o
+# RUN: llvm-mc -filetype=obj -triple=s390x -defsym ADDEND=32769 %t/addend.s -o %t/3.o
+
+# RUN: ld.lld --section-start=.text=0x0 %t/1.o -o %t/1out
+# RUN: ld.lld --section-start=.text=0x0 %t/2.o -o %t/2out
+# RUN: not ld.lld --section-start=.text=0x0 %t/3.o -o /dev/null 2>&1 | FileCheck %s -DFILE=%t/3.o --check-prefix RANGE
+
+# RANGE: error: [[FILE]]:(.text+0x0): relocation R_390_PC16 out of range
+
+# RUN: llvm-readelf --hex-dump=.text %t/1out | FileCheck %s -DADDEND=ffff --check-prefix DUMP
+# RUN: llvm-readelf --hex-dump=.text %t/2out | FileCheck %s -DADDEND=8000 --check-prefix DUMP
+
+# DUMP:  0x00000000 [[ADDEND]]
+
+#--- shared.s
+.data
+ .byte 0xe8
+ .word _shared - .
+
+#--- addend.s
+.text
+.globl _start
+_start:
+    .reloc ., R_390_PC16, .text-ADDEND
+    .space 2
diff --git a/lld/test/ELF/systemz-reloc-pc32.s b/lld/test/ELF/systemz-reloc-pc32.s
new file mode 100644
index 00000000000000..0cb9322eb1c1b9
--- /dev/null
+++ b/lld/test/ELF/systemz-reloc-pc32.s
@@ -0,0 +1,39 @@
+# REQUIRES: systemz
+# RUN: rm -rf %t && split-file %s %t
+
+## Check recompile with -fPIC error message
+# RUN: llvm-mc -filetype=obj -triple=s390x-unknown-linux %t/shared.s -o %t/shared.o
+# RUN: not ld.lld -shared %t/shared.o -o /dev/null 2>&1 | FileCheck %s
+
+# CHECK: error: relocation R_390_PC32 cannot be used against symbol '_shared'; recompile with -fPIC
+# CHECK: >>> defined in {{.*}}
+# CHECK: >>> referenced by {{.*}}:(.data+0x1)
+
+## Check patching of negative addends
+
+# RUN: llvm-mc -filetype=obj -triple=s390x -defsym ADDEND=1 %t/addend.s -o %t/1.o
+# RUN: llvm-mc -filetype=obj -triple=s390x -defsym ADDEND=2147483648 %t/addend.s -o %t/2.o
+# RUN: llvm-mc -filetype=obj -triple=s390x -defsym ADDEND=2147483649 %t/addend.s -o %t/3.o
+
+# RUN: ld.lld --section-start=.text=0x0 %t/1.o -o %t/1out
+# RUN: ld.lld --section-start=.text=0x0 %t/2.o -o %t/2out
+# RUN: not ld.lld --section-start=.text=0x0 %t/3.o -o /dev/null 2>&1 | FileCheck %s -DFILE=%t/3.o --check-prefix RANGE
+
+# RANGE: error: [[FILE]]:(.text+0x0): relocation R_390_PC32 out of range
+
+# RUN: llvm-readelf --hex-dump=.text %t/1out | FileCheck %s -DADDEND=ffffffff --check-prefix DUMP
+# RUN: llvm-readelf --hex-dump=.text %t/2out | FileCheck %s -DADDEND=80000000 --check-prefix DUMP
+
+# DUMP:  0x00000000 [[ADDEND]]
+
+#--- shared.s
+.data
+ .byte 0xe8
+ .long _shared - .
+
+#--- addend.s
+.text
+.globl _start
+_start:
+    .reloc ., R_390_PC32, .text-ADDEND
+    .space 4
diff --git a/lld/test/ELF/systemz-reloc-pcdbl.s b/lld/test/ELF/systemz-reloc-pcdbl.s
new file mode 100644
index 00000000000000..faee756f5e95b4
--- /dev/null
+++ b/lld/test/ELF/systemz-reloc-pcdbl.s
@@ -0,0 +1,68 @@
+# REQUIRES: systemz
+
+# RUN: llvm-mc --filetype=obj --triple=s390x-unknown-linux -mcpu=z13 %s -o %t.o
+
+# RUN: ld.lld %t.o --defsym foo16=pc16dbl+4 --defsym bar16=pc16dbl --defsym foo32=pc32dbl+6 --defsym bar32=pc32dbl --defsym foo12=pc12dbl+6 --defsym bar12=pc12dbl --defsym foo24=pc24dbl+6 --defsym bar24=pc24dbl -o %t
+# RUN: llvm-objdump --no-show-raw-insn --mcpu=z13 -d %t | FileCheck %s --check-prefix=CHECK
+# CHECK: 0000000001001120 <pc16dbl>:
+# CHECK: je      0x1001124
+# CHECK: jne     0x1001120
+# CHECK: 0000000001001128 <pc32dbl>:
+# CHECK: jge     0x100112e
+# CHECK: jgne    0x1001128
+# CHECK: 0000000001001134 <pc12dbl>:
+# CHECK: bprp    5, 0x100113a, 0x1001134
+# CHECK: bprp    6, 0x1001134, 0x100113a
+# CHECK: 0000000001001140 <pc24dbl>:
+# CHECK: bprp    5, 0x1001140, 0x1001146
+# CHECK: bprp    6, 0x1001146, 0x1001140
+
+# RUN: ld.lld %t.o --defsym foo16=pc16dbl+0xfffe --defsym bar16=pc16dbl+4-0x10000 --defsym foo32=pc32dbl+0xfffffffe --defsym bar32=pc32dbl+6-0x100000000 --defsym foo12=pc12dbl+0xffe --defsym bar12=pc12dbl+6-0x1000 --defsym foo24=pc24dbl+0xfffffe --defsym bar24=pc24dbl+6-0x1000000 -o %t.limits
+# RUN: llvm-objdump --no-show-raw-insn --mcpu=z13 -d %t.limits | FileCheck %s --check-prefix=LIMITS
+# LIMITS:      je   0x101111e
+# LIMITS-NEXT: jne  0xff1124
+# LIMITS:      jge  0x101001126
+# LIMITS-NEXT: jgne 0xffffffff0100112e
+# LIMITS:      bprp 5, 0x1002132, 0x1001134
+# LIMITS-NEXT: bprp 6, 0x100013a, 0x100113a
+# LIMITS:      bprp 5, 0x1001140, 0x200113e
+# LIMITS-NEXT: bprp 6, 0x1001146, 0x1146
+
+# RUN: not ld.lld %t.o --defsym foo16=pc16dbl+0x10000 --defsym bar16=pc16dbl+4-0x10002 --defsym foo32=pc32dbl+0x100000000 --defsym bar32=pc32dbl+6-0x100000002 --defsym foo12=pc12dbl+0x1000 --defsym bar12=pc12dbl+6-0x1002 --defsym foo24=pc24dbl+0x1000000 --defsym bar24=pc24dbl+6-0x1000002 -o /dev/null 2>&1 | FileCheck -DFILE=%t.o --check-prefix=ERROR-RANGE %s
+# ERROR-RANGE: error: [[FILE]]:(.text+0x2): relocation R_390_PC16DBL out of range: 65536 is not in [-65536, 65535]; references 'foo16'
+# ERROR-RANGE: error: [[FILE]]:(.text+0x6): relocation R_390_PC16DBL out of range: -65538 is not in [-65536, 65535]; references 'bar16'
+# ERROR-RANGE: error: [[FILE]]:(.text+0xa): relocation R_390_PC32DBL out of range: 4294967296 is not in [-4294967296, 4294967295]; references 'foo32'
+# ERROR-RANGE: error: [[FILE]]:(.text+0x10): relocation R_390_PC32DBL out of range: -4294967298 is not in [-4294967296, 4294967295]; references 'bar32'
+# ERROR-RANGE: error: [[FILE]]:(.text+0x15): relocation R_390_PC12DBL out of range: 4096 is not in [-4096, 4095]; references 'foo12'
+# ERROR-RANGE: error: [[FILE]]:(.text+0x1b): relocation R_390_PC12DBL out of range: -4098 is not in [-4096, 4095]; references 'bar12'
+# ERROR-RANGE: error: [[FILE]]:(.text+0x23): relocation R_390_PC24DBL out of range: 16777216 is not in [-16777216, 16777215]; references 'foo24'
+# ERROR-RANGE: error: [[FILE]]:(.text+0x29): relocation R_390_PC24DBL out of range: -16777218 is not in [-16777216, 16777215]; references 'bar24'
+
+# RUN: not ld.lld %t.o --defsym foo16=pc16dbl+1 --defsym bar16=pc16dbl-1 --defsym foo32=pc32dbl+1 --defsym bar32=pc32dbl-1 --defsym foo12=pc12dbl+1 --defsym bar12=pc12dbl-1 --defsym foo24=pc24dbl+1 --defsym bar24=pc24dbl-1 -o /dev/null 2>&1 | FileCheck -DFILE=%t.o --check-prefix=ERROR-ALIGN %s
+# ERROR-ALIGN:      error: [[FILE]]:(.text+0x2): improper alignment for relocation R_390_PC16DBL: 0x1 is not aligned to 2 bytes
+# ERROR-ALIGN-NEXT: error: [[FILE]]:(.text+0x6): improper alignment for relocation R_390_PC16DBL: 0xFFFFFFFFFFFFFFFB is not aligned to 2 bytes
+# ERROR-ALIGN-NEXT: error: [[FILE]]:(.text+0xa): improper alignment for relocation R_390_PC32DBL: 0x1 is not aligned to 2 bytes
+# ERROR-ALIGN-NEXT: error: [[FILE]]:(.text+0x10): improper alignment for relocation R_390_PC32DBL: 0xFFFFFFFFFFFFFFF9 is not aligned to 2 bytes
+# ERROR-ALIGN-NEXT: error: [[FILE]]:(.text+0x15): improper alignment for relocation R_390_PC12DBL: 0x1 is not aligned to 2 bytes
+# ERROR-ALIGN-NEXT: error: [[FILE]]:(.text+0x1b): improper alignment for relocation R_390_PC12DBL: 0xFFFFFFFFFFFFFFF9 is not aligned to 2 bytes
+# ERROR-ALIGN-NEXT: error: [[FILE]]:(.text+0x23): improper alignment for relocation R_390_PC24DBL: 0x1 is not aligned to 2 bytes
+# ERROR-ALIGN-NEXT: error: [[FILE]]:(.text+0x29): improper alignment for relocation R_390_PC24DBL: 0xFFFFFFFFFFFFFFF9 is not aligned to 2 bytes
+
+.global _start
+.global pc16dbl
+.global pc32dbl
+.global pc12dbl
+.global pc24dbl
+_start:
+pc16dbl:
+     je   foo16
+     jne  bar16
+pc32dbl:
+     jge  foo32
+     jgne bar32
+pc12dbl:
+     bprp 5,foo12,0
+     bprp 6,bar12,0
+pc24dbl:
+     bprp 5,0,foo24
+     bprp 6,0,bar24
diff --git a/lld/test/ELF/systemz-tls-gd.s b/lld/test/ELF/systemz-tls-gd.s
new file mode 100644
index 00000000000000..3976f55a6ae39e
--- /dev/null
+++ b/lld/test/ELF/systemz-tls-gd.s
@@ -0,0 +1,142 @@
+# REQUIRES: systemz
+# RUN: llvm-mc -filetype=obj -triple=s390x-unknown-linux %s -o %t.o
+# RUN: echo '.tbss; .globl b, c; b: .zero 4; c:' | llvm-mc -filetype=obj -triple=s390x-unknown-linux - -o %t1.o
+# RUN: ld.lld -shared -soname=t1.so %t1.o -o %t1.so
+
+# RUN: ld.lld -shared %t.o %t1.o -o %t.so
+# RUN: llvm-readelf -r %t.so | FileCheck --check-prefix=GD-REL %s
+# RUN: llvm-objdump -d --no-show-raw-insn %t.so | FileCheck --check-prefix=GD %s
+# RUN: llvm-objdump --section .data.rel.ro --full-contents %t.so | FileCheck --check-prefix=GD-DATA %s
+
+# RUN: ld.lld %t.o %t1.o -o %t.le
+# RUN: llvm-readelf -r %t.le | FileCheck --check-prefix=NOREL %s
+# RUN: llvm-objdump -d --no-show-raw-insn %t.le | FileCheck --check-prefix=LE %s
+# RUN: llvm-objdump --section .data.rel.ro --full-contents %t.le | FileCheck --check-prefix=LE-DATA %s
+
+# RUN: ld.lld %t.o %t1.so -o %t.ie
+# RUN: llvm-readelf -r %t.ie | FileCheck --check-prefix=IE-REL %s
+# RUN: llvm-objdump -d --no-show-raw-insn %t.ie | FileCheck --check-prefix=IE %s
+# RUN: llvm-objdump --section .data.rel.ro --full-contents %t.ie | FileCheck --check-prefix=IE-DATA %s
+
+# GD-REL: Relocation section '.rela.dyn' at offset {{.*}} contains 6 entries:
+# GD-REL:      0000000000002570 0000000200000036 R_390_TLS_DTPMOD 0000000000000008 a + 0
+# GD-REL-NEXT: 0000000000002578 0000000200000037 R_390_TLS_DTPOFF 0000000000000008 a + 0
+# GD-REL-NEXT: 0000000000002580 0000000300000036 R_390_TLS_DTPMOD 000000000000000c b + 0
+# GD-REL-NEXT: 0000000000002588 0000000300000037 R_390_TLS_DTPOFF 000000000000000c b + 0
+# GD-REL-NEXT: 0000000000002590 0000000400000036 R_390_TLS_DTPMOD 0000000000000010 c + 0
+# GD-REL-NEXT: 0000000000002598 0000000400000037 R_390_TLS_DTPOFF 0000000000000010 c + 0
+
+## _GLOBAL_OFFSET_TABLE is at 0x2558
+# GD:      larl    %r12, 0x2558
+
+## GOT offset of the TLS module ID / offset pair for a is at 0x2460
+# GD-NEXT: lgrl    %r2, 0x2460
+# GD-NEXT: brasl   %r14, 0x1440
+# GD-NEXT: lgf     %r2, 0(%r2,%r7)
+
+## GOT offset of the TLS module ID / offset pair for b is at 0x2468
+# GD-NEXT: lgrl    %r2, 0x2468
+# GD-NEXT: brasl   %r14, 0x1440
+# GD-NEXT: lgf     %r2, 0(%r2,%r7)
+
+## GOT offset of the TLS module ID / offset pair for c is at 0x2470
+# GD-NEXT: lgrl    %r2, 0x2470
+# GD-NEXT: brasl   %r14, 0x1440
+# GD-NEXT: lgf     %r2, 0(%r2,%r7)
+
+## Constant pool holding GOT offsets of TLS module ID / offset pairs:
+# a: 0x2570 / 0x18
+# b: 0x2580 / 0x28
+# c: 0x2590 / 0x38
+# GD-DATA:      2460 00000000 00000018 00000000 00000028
+# GD-DATA-NEXT: 2470 00000000 00000038
+
+# NOREL: no relocations
+
+## _GLOBAL_OFFSET_TABLE is at 0x1002230
+# LE:      larl    %r12, 0x1002230
+
+## TP offset of a is at 0x1002218
+# LE-NEXT: lgrl    %r2, 0x1002218
+# LE-NEXT: brcl    0,
+# LE-NEXT: lgf     %r2, 0(%r2,%r7)
+
+## TP offset of b is at 0x1002220
+# LE-NEXT: lgrl    %r2, 0x1002220
+# LE-NEXT: brcl    0,
+# LE-NEXT: lgf     %r2, 0(%r2,%r7)
+
+## TP offset of c is at 0x1002228
+# LE-NEXT: lgrl    %r2, 0x1002228
+# LE-NEXT: brcl    0,
+# LE-NEXT: lgf     %r2, 0(%r2,%r7)
+
+## TP offsets
+# a: -8
+# b: -4
+# c: 0
+# LE-DATA:      1002218 ffffffff fffffff8 ffffffff fffffffc
+# LE-DATA-NEXT: 1002228 00000000 00000000
+
+
+# IE-REL: Relocation section '.rela.dyn' at offset {{.*}} contains 2 entries:
+# IE-REL:      0000000001002430 0000000200000038 R_390_TLS_TPOFF 0000000000000000 b + 0
+# IE-REL-NEXT: 0000000001002438 0000000300000038 R_390_TLS_TPOFF 0000000000000000 c + 0
+
+## _GLOBAL_OFFSET_TABLE is at 0x1002418
+# IE:      larl    %r12, 0x1002418
+
+## TP offset of a is at 0x1002340
+# IE-NEXT: lgrl    %r2, 0x1002340
+# IE-NEXT: brcl    0,
+# IE-NEXT: lgf     %r2, 0(%r2,%r7)
+
+## GOT offset of the TP offset for b is at 0x1002348
+# IE-NEXT: lgrl    %r2, 0x1002348
+# IE-NEXT: lg      %r2, 0(%r2,%r12)
+# IE-NEXT: lgf     %r2, 0(%r2,%r7)
+
+## GOT offset of the TP offset for c is at 0x1002350
+# IE-NEXT: lgrl    %r2, 0x1002350
+# IE-NEXT: lg      %r2, 0(%r2,%r12)
+# IE-NEXT: lgf     %r2, 0(%r2,%r7)
+
+## TP offsets (a) / GOT offset of TP offsets (b, c)
+# a: -4
+# b: 0x1002430 / 0x18
+# c: 0x1002438 / 0x20
+# IE-DATA:      1002340 ffffffff fffffffc 00000000 00000018
+# IE-DATA-NEXT: 1002350 00000000 00000020
+
+
+ear     %r7,%a0
+sllg    %r7,%r1,32
+ear     %r7,%a1
+larl    %r12,_GLOBAL_OFFSET_TABLE_
+
+lgrl    %r2,.LC0
+brasl   %r14,__tls_get_offset@PLT:tls_gdcall:a
+lgf     %r2,0(%r2,%r7)
+
+lgrl    %r2,.LC1
+brasl   %r14,__tls_get_offset@PLT:tls_gdcall:b
+lgf     %r2,0(%r2,%r7)
+
+lgrl    %r2,.LC2
+brasl   %r14,__tls_get_offset@PLT:tls_gdcall:c
+lgf     %r2,0(%r2,%r7)
+
+        .section        .data.rel.ro,"aw"
+        .align  8
+.LC0:
+        .quad   a@TLSGD
+.LC1:
+        .quad   b@TLSGD
+.LC2:
+        .quad   c@TLSGD
+
+	.section .tbss
+	.globl a
+	.zero 8
+a:
+	.zero 4
diff --git a/lld/test/ELF/systemz-tls-ie.s b/lld/test/ELF/systemz-tls-ie.s
new file mode 100644
index 00000000000000..85e2f24cb61f62
--- /dev/null
+++ b/lld/test/ELF/systemz-tls-ie.s
@@ -0,0 +1,121 @@
+# REQUIRES: systemz
+# RUN: llvm-mc -filetype=obj -triple=s390x-unknown-linux %s -o %t.o
+
+# RUN: ld.lld -shared %t.o -o %t.so
+# RUN: llvm-readelf -r %t.so | FileCheck --check-prefix=IE-REL %s
+# RUN: llvm-objdump -d --no-show-raw-insn %t.so | FileCheck --check-prefix=IE %s
+# RUN: llvm-objdump --section .data --full-contents %t.so | FileCheck --check-prefix=IE-DATA %s
+
+# RUN: ld.lld %t.o -o %t
+# RUN: llvm-readelf -r %t | FileCheck --check-prefix=NOREL %s
+# RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck --check-prefix=LE %s
+# RUN: llvm-objdump --section .data --full-contents %t | FileCheck --check-prefix=LE-DATA %s
+# RUN: llvm-objdump --section .got --full-contents %t | FileCheck --check-prefix=LE-GOT %s
+
+## With -pie we still have the R_390_RELATIVE for the data element, but all GOT
+## entries should be fully resolved without any remaining R_390_TLS_TPOFF.
+# RUN: ld.lld -pie %t.o -o %t.pie
+# RUN: llvm-readelf -r %t.pie | FileCheck --check-prefix=PIE-REL %s
+# RUN: llvm-objdump -d --no-show-raw-insn %t.pie | FileCheck --check-prefix=PIE %s
+# RUN: llvm-objdump --section .data --full-contents %t.pie | FileCheck --check-prefix=PIE-DATA %s
+# RUN: llvm-objdump --section .got --full-contents %t.pie | FileCheck --check-prefix=PIE-GOT %s
+
+# IE-REL: Relocation section '.rela.dyn' at offset {{.*}} contains 4 entries:
+# IE-REL: 0000000000003478 000000000000000c R_390_RELATIVE 2460
+# IE-REL: 0000000000002460 0000000100000038 R_390_TLS_TPOFF 0000000000000008 a + 0
+# IE-REL: 0000000000002468 0000000200000038 R_390_TLS_TPOFF 000000000000000c b + 0
+# IE-REL: 0000000000002470 0000000300000038 R_390_TLS_TPOFF 0000000000000010 c + 0
+
+## TP offset for a is at 0x2460
+# IE:      lgrl    %r1, 0x2460
+# IE-NEXT: lgf     %r1, 0(%r1,%r7)
+
+## TP offset for b is at 0x2468
+# IE-NEXT: lgrl    %r1, 0x2468
+# IE-NEXT: lgf     %r1, 0(%r1,%r7)
+
+## TP offset for c is at 0x2470
+# IE-NEXT: lgrl    %r1, 0x2470
+# IE-NEXT: lgf     %r1, 0(%r1,%r7)
+
+## Data element: TP offset for a is at 0x2460 (relocated via R_390_RELATIVE above)
+# IE-DATA: 3478 00000000 00000000
+
+# NOREL: no relocations
+
+## TP offset for a is at 0x1002250
+# LE:      lgrl    %r1, 0x1002250
+# LE-NEXT: lgf     %r1, 0(%r1,%r7)
+
+## TP offset for b is at 0x1002258
+# LE-NEXT: lgrl    %r1, 0x1002258
+# LE-NEXT: lgf     %r1, 0(%r1,%r7)
+
+## TP offset for c is at 0x1002260
+# LE-NEXT: lgrl    %r1, 0x1002260
+# LE-NEXT: lgf     %r1, 0(%r1,%r7)
+
+## Data element: TP offset for a is at 0x1002250
+# LE-DATA: 00000000 01002250
+
+## TP offsets in GOT:
+# a: -8
+# b: -4
+# c: 0
+# LE-GOT: 1002238 00000000 00000000 00000000 00000000
+# LE-GOT: 1002248 00000000 00000000 ffffffff fffffff8
+# LE-GOT: 1002258 ffffffff fffffffc 00000000 00000000
+
+# PIE-REL: Relocation section '.rela.dyn' at offset {{.*}} contains 1 entries:
+# PIE-REL: 00000000000033d0 000000000000000c R_390_RELATIVE 23b8
+
+## TP offset for a is at 0x23b8
+# PIE:      lgrl    %r1, 0x23b8
+# PIE-NEXT: lgf     %r1, 0(%r1,%r7)
+
+## TP offset for b is at 0x23c0
+# PIE-NEXT: lgrl    %r1, 0x23c0
+# PIE-NEXT: lgf     %r1, 0(%r1,%r7)
+
+## TP offset for c is at 0x23c8
+# PIE-NEXT: lgrl    %r1, 0x23c8
+# PIE-NEXT: lgf     %r1, 0(%r1,%r7)
+
+## Data element: TP offset for a is at 0x23b8 (relocated via R_390_RELATIVE above)
+# PIE-DATA: 33d0 00000000 00000000
+
+## TP offsets in GOT:
+# a: -8
+# b: -4
+# c: 0
+# PIE-GOT: 23a0 00000000 000022d0 00000000 00000000
+# PIE-GOT: 23b0 00000000 00000000 ffffffff fffffff8
+# PIE-GOT: 23c0 ffffffff fffffffc 00000000 00000000
+
+ear     %r7,%a0
+sllg    %r7,%r1,32
+ear     %r7,%a1
+
+lgrl    %r1, a@indntpoff
+lgf     %r1,0(%r1,%r7)
+
+lgrl    %r1, b@indntpoff
+lgf     %r1,0(%r1,%r7)
+
+lgrl    %r1, c@indntpoff
+lgf     %r1,0(%r1,%r7)
+
+	.data
+	.reloc .,R_390_TLS_IE64,a
+	.space 8
+
+	.section .tbss
+	.globl a
+	.globl b
+	.globl c
+	.zero 8
+a:
+	.zero 4
+b:
+	.zero 4
+c:
diff --git a/lld/test/ELF/systemz-tls-ld.s b/lld/test/ELF/systemz-tls-ld.s
new file mode 100644
index 00000000000000..2cb36d7294f2b0
--- /dev/null
+++ b/lld/test/ELF/systemz-tls-ld.s
@@ -0,0 +1,114 @@
+# REQUIRES: systemz
+# RUN: llvm-mc -filetype=obj -triple=s390x-unknown-linux %s -o %t.o
+
+# RUN: ld.lld -shared %t.o -o %t.so
+# RUN: llvm-readelf -r %t.so | FileCheck --check-prefix=LD-REL %s
+# RUN: llvm-objdump -d --no-show-raw-insn %t.so | FileCheck --check-prefix=LD %s
+# RUN: llvm-objdump --section .data.rel.ro --full-contents %t.so | FileCheck --check-prefix=LD-DATA %s
+
+# RUN: ld.lld %t.o -o %t
+# RUN: llvm-readelf -r %t | FileCheck --check-prefix=NOREL %s
+# RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck --check-prefix=LE %s
+# RUN: llvm-objdump --section .data.rel.ro --full-contents %t | FileCheck --check-prefix=LE-DATA %s
+
+# LD-REL: Relocation section '.rela.dyn' at offset {{.*}} contains 1 entries:
+# LD-REL: 00000000000024f8 0000000000000036 R_390_TLS_DTPMOD 0
+
+## _GLOBAL_OFFSET_TABLE is at 0x24e0
+# LD:      larl    %r12, 0x24e0
+
+## GOT offset of the LDM TLS module ID is at 0x23e0
+# LD-NEXT: lgrl    %r2, 0x23e0
+# LD-NEXT: brasl   %r14, 0x13c0
+# LD-NEXT: la      %r2, 0(%r2,%r7)
+
+## DTP offset for a is at 0x23e8
+# LD-NEXT: lgrl    %r1, 0x23e8
+# LD-NEXT: lgf     %r1, 0(%r1,%r2)
+
+## DTP offset for b is at 0x23f0
+# LD-NEXT: lgrl    %r1, 0x23f0
+# LD-NEXT: lgf     %r1, 0(%r1,%r2)
+
+## DTP offset for c is at 0x23f8
+# LD-NEXT: lgrl    %r1, 0x23f8
+# LD-NEXT: lgf     %r1, 0(%r1,%r2)
+
+## Constant pool holding GOT offsets of TLS module ID and DTP offsets:
+# TLS module ID: 0x24f8 / 0x18
+# a: 8
+# b: 12
+# c: 16
+# LD-DATA: 23e0 00000000 00000018 00000000 00000008
+# LD-DATA: 23f0 00000000 0000000c 00000000 00000010
+
+# NOREL: no relocations
+
+## _GLOBAL_OFFSET_TABLE is at 0x1002230
+# LE:      larl    %r12, 0x1002230
+
+## GOT offset of the LDM TLS module ID is at 0x1002210
+# LE-NEXT: lgrl    %r2, 0x1002210
+# LE-NEXT: brcl    0,
+# LE-NEXT: la      %r2, 0(%r2,%r7)
+
+## TP offset for a is at 0x1002218
+# LE-NEXT: lgrl    %r1, 0x1002218
+# LE-NEXT: lgf     %r1, 0(%r1,%r2)
+
+## TP offset for b is at 0x1002220
+# LE-NEXT: lgrl    %r1, 0x1002220
+# LE-NEXT: lgf     %r1, 0(%r1,%r2)
+
+## TP offset for c is at 0x1002228
+# LE-NEXT: lgrl    %r1, 0x1002228
+# LE-NEXT: lgf     %r1, 0(%r1,%r2)
+
+## zeroed LDM / TP offsets:
+# LDM TLS: 0
+# a: -8
+# b: -4
+# c: 0
+# LE-DATA: 1002210 00000000 00000000 ffffffff fffffff8
+# LE-DATA: 1002220 ffffffff fffffffc 00000000 00000000
+
+
+ear     %r7,%a0
+sllg    %r7,%r1,32
+ear     %r7,%a1
+larl    %r12,_GLOBAL_OFFSET_TABLE_
+
+lgrl    %r2,.LC0
+brasl   %r14,__tls_get_offset@PLT:tls_ldcall:a
+la      %r2,0(%r2,%r7)
+
+lgrl    %r1, .LC1
+lgf     %r1,0(%r1,%r2)
+
+lgrl    %r1, .LC2
+lgf     %r1,0(%r1,%r2)
+
+lgrl    %r1, .LC3
+lgf     %r1,0(%r1,%r2)
+
+        .section        .data.rel.ro,"aw"
+        .align  8
+.LC0:
+        .quad   a@TLSLDM
+.LC1:
+        .quad   a@DTPOFF
+.LC2:
+        .quad   b@DTPOFF
+.LC3:
+        .quad   c@DTPOFF
+
+	.section .tbss
+	.globl a
+	.globl b
+	.globl c
+	.zero 8
+a:
+	.zero 4
+b:
+	.zero 4
+c:
diff --git a/lld/test/ELF/systemz-tls-le.s b/lld/test/ELF/systemz-tls-le.s
new file mode 100644
index 00000000000000..9e41fc768da391
--- /dev/null
+++ b/lld/test/ELF/systemz-tls-le.s
@@ -0,0 +1,61 @@
+# REQUIRES: systemz
+# RUN: llvm-mc -filetype=obj -triple=s390x-unknown-linux %s -o %t.o
+
+# RUN: ld.lld %t.o -o %t
+# RUN: llvm-readelf -r %t | FileCheck --check-prefix=NOREL %s
+# RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck --check-prefix=LE %s
+# RUN: llvm-objdump --section .data.rel.ro --full-contents %t | FileCheck --check-prefix=LE-DATA %s
+
+# NOREL: no relocations
+
+## TP offset for a is at 0x1002200
+# LE:      lgrl    %r1, 0x1002200
+# LE-NEXT: lgf     %r1, 0(%r1,%r7)
+
+## TP offset for b is at 0x1002208
+# LE-NEXT: lgrl    %r1, 0x1002208
+# LE-NEXT: lgf     %r1, 0(%r1,%r7)
+
+## TP offset for c is at 0x1002210
+# LE-NEXT: lgrl    %r1, 0x1002210
+# LE-NEXT: lgf     %r1, 0(%r1,%r7)
+
+## TP offsets:
+# a: -8
+# b: -4
+# c: 0
+# LE-DATA: 1002200 ffffffff fffffff8 ffffffff fffffffc
+# LE-DATA: 1002210 00000000 00000000
+
+ear     %r7,%a0
+sllg    %r7,%r1,32
+ear     %r7,%a1
+
+lgrl    %r1, .LC0
+lgf     %r1,0(%r1,%r7)
+
+lgrl    %r1, .LC1
+lgf     %r1,0(%r1,%r7)
+
+lgrl    %r1, .LC2
+lgf     %r1,0(%r1,%r7)
+
+        .section        .data.rel.ro,"aw"
+        .align  8
+.LC0:
+        .quad   a@ntpoff
+.LC1:
+        .quad   b@ntpoff
+.LC2:
+        .quad   c@ntpoff
+
+	.section .tbss
+	.globl a
+	.globl b
+	.globl c
+	.zero 8
+a:
+	.zero 4
+b:
+	.zero 4
+c:
diff --git a/lld/test/MachO/invalid/invalid-lto-object-path.ll b/lld/test/MachO/invalid/invalid-lto-object-path.ll
index 75c6a97e446fb2..c862538d592ce8 100644
--- a/lld/test/MachO/invalid/invalid-lto-object-path.ll
+++ b/lld/test/MachO/invalid/invalid-lto-object-path.ll
@@ -1,4 +1,4 @@
-; REQUIRES: x86
+; REQUIRES: x86, non-root-user
 
 ;; Creating read-only directories with `chmod 400` isn't supported on Windows
 ; UNSUPPORTED: system-windows
diff --git a/lld/test/MachO/thinlto-emit-imports.ll b/lld/test/MachO/thinlto-emit-imports.ll
index 47a612bd0a7b56..88f766f59c8877 100644
--- a/lld/test/MachO/thinlto-emit-imports.ll
+++ b/lld/test/MachO/thinlto-emit-imports.ll
@@ -1,4 +1,4 @@
-; REQUIRES: x86
+; REQUIRES: x86, non-root-user
 ; RUN: rm -rf %t; split-file %s %t
 
 ; Generate summary sections and test lld handling.
diff --git a/lld/test/MinGW/driver.test b/lld/test/MinGW/driver.test
index 559a32bfa242f8..057de2a22f6a0c 100644
--- a/lld/test/MinGW/driver.test
+++ b/lld/test/MinGW/driver.test
@@ -409,6 +409,13 @@ LTO_OPTS: -mllvm:-mcpu=x86-64 -opt:lldlto=2 -dwodir:foo -lto-cs-profile-generate
 RUN: ld.lld -### foo.o -m i386pep --lto-O2 --lto-CGO1 --lto-cs-profile-generate --lto-cs-profile-file=foo 2>&1 | FileCheck -check-prefix=LTO_OPTS2 %s
 LTO_OPTS2:-opt:lldlto=2 -opt:lldltocgo=1 -lto-cs-profile-generate -lto-cs-profile-file:foo
 
+RUN: ld.lld -### foo.o -m i386pe -plugin-opt=emit-asm 2>&1 | FileCheck -check-prefix=LTO_EMIT_ASM %s
+RUN: ld.lld -### foo.o -m i386pe --lto-emit-asm 2>&1 | FileCheck -check-prefix=LTO_EMIT_ASM %s
+LTO_EMIT_ASM: -lldemit:asm
+
+RUN: ld.lld -### foo.o -m i386pe -plugin-opt=emit-llvm 2>&1 | FileCheck -check-prefix=LTO_EMIT_LLVM %s
+LTO_EMIT_LLVM: -lldemit:llvm
+
 Test GCC specific LTO options that GCC passes unconditionally, that we ignore.
 
 RUN: ld.lld -### foo.o -m i386pep -plugin /usr/lib/gcc/x86_64-w64-mingw32/10-posix/liblto_plugin.so -plugin-opt=/usr/lib/gcc/x86_64-w64-mingw32/10-posix/lto-wrapper -plugin-opt=-fresolution=/tmp/ccM9d4fP.res -plugin-opt=-pass-through=-lmingw32 2> /dev/null
diff --git a/lld/test/lit.cfg.py b/lld/test/lit.cfg.py
index b3e07f1f823cc4..d309c2ad4ee284 100644
--- a/lld/test/lit.cfg.py
+++ b/lld/test/lit.cfg.py
@@ -83,6 +83,7 @@
                 "PowerPC": "ppc",
                 "RISCV": "riscv",
                 "Sparc": "sparc",
+                "SystemZ": "systemz",
                 "WebAssembly": "wasm",
                 "X86": "x86",
             },
diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt
index c2fb77d5a371f4..44f2850b92d522 100644
--- a/llvm/CMakeLists.txt
+++ b/llvm/CMakeLists.txt
@@ -35,7 +35,7 @@ endif()
 
 if(NOT DEFINED LLVM_SHLIB_SYMBOL_VERSION)
   # "Symbol version prefix for libLLVM.so"
-  set(LLVM_SHLIB_SYMBOL_VERSION "LLVM_${LLVM_VERSION_MAJOR}")
+  set(LLVM_SHLIB_SYMBOL_VERSION "LLVM_${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}")
 endif()
 
 if ((CMAKE_GENERATOR MATCHES "Visual Studio") AND (MSVC_TOOLSET_VERSION LESS 142) AND (CMAKE_GENERATOR_TOOLSET STREQUAL ""))
diff --git a/llvm/cmake/modules/AddLLVM.cmake b/llvm/cmake/modules/AddLLVM.cmake
index 5e989618552824..5fc663d10b8f77 100644
--- a/llvm/cmake/modules/AddLLVM.cmake
+++ b/llvm/cmake/modules/AddLLVM.cmake
@@ -108,7 +108,7 @@ function(add_llvm_symbol_exports target_name export_file)
       COMMAND "${Python3_EXECUTABLE}" "-c"
       "import sys; \
        lines = ['    ' + l.rstrip() for l in sys.stdin] + ['  local: *;']; \
-       print('LLVM_${LLVM_VERSION_MAJOR} {'); \
+       print('LLVM_${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR} {'); \
        print('  global:') if len(lines) > 1 else None; \
        print(';\\n'.join(lines) + '\\n};')"
       < ${export_file} > ${native_export_file}
@@ -646,9 +646,9 @@ function(llvm_add_library name)
     if(UNIX AND NOT APPLE AND NOT ARG_SONAME)
       set_target_properties(${name}
         PROPERTIES
-        # Since 4.0.0, the ABI version is indicated by the major version
-        SOVERSION ${LLVM_VERSION_MAJOR}${LLVM_VERSION_SUFFIX}
-        VERSION ${LLVM_VERSION_MAJOR}${LLVM_VERSION_SUFFIX})
+        # Since 18.1.0, the ABI version is indicated by the major and minor version.
+        SOVERSION ${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}${LLVM_VERSION_SUFFIX}
+        VERSION ${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}${LLVM_VERSION_SUFFIX})
     endif()
   endif()
 
diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst
index 82f4a7a15c9c13..5b3210138f2f89 100644
--- a/llvm/docs/ReleaseNotes.rst
+++ b/llvm/docs/ReleaseNotes.rst
@@ -164,6 +164,30 @@ Changes to the MIPS Backend
 Changes to the PowerPC Backend
 ------------------------------
 
+* LLJIT's JIT linker now defaults to JITLink on 64-bit ELFv2 targets.
+* Initial-exec TLS model is supported on AIX.
+* Implemented new resource based scheduling model of POWER7 and POWER8.
+* ``frexp`` libcall now references correct symbol name for ``fp128``.
+* Optimized materialization of 64-bit immediates, code generation of
+  ``vec_promote`` and atomics.
+* Global constant strings are pooled in the TOC under one entry to reduce the
+  number of entries in the TOC.
+* Added a number of missing Power10 extended mnemonics.
+* Added the SCV instruction.
+* Fixed register class for the paddi instruction.
+* Optimize VPERM and fix code order for swapping vector operands on LE.
+* Added various bug fixes and code gen improvements.
+
+AIX Support/improvements:
+
+* Support for a non-TOC-based access sequence for the local-exec TLS model (called small local-exec).
+* XCOFF toc-data peephole optimization and bug fixes.
+* Move less often used __ehinfo TOC entries to the end of the TOC section.
+* Fixed problems when the AIX libunwind unwinds starting from a signal handler
+  and the function that raised the signal happens to be a leaf function that
+  shares the stack frame with its caller or a leaf function that does not store
+  the stack frame backchain.
+
 Changes to the RISC-V Backend
 -----------------------------
 
@@ -317,6 +341,7 @@ Changes to the LLVM tools
 
 * llvm-symbolizer now treats invalid input as an address for which source
   information is not found.
+* Fixed big-endian support in llvm-symbolizer's DWARF location parser.
 * llvm-readelf now supports ``--extra-sym-info`` (``-X``) to display extra
   information (section name) when showing symbols.
 
@@ -328,6 +353,13 @@ Changes to the LLVM tools
 * llvm-objcopy now supports ``--gap-fill`` and ``--pad-to`` options, for
   ELF input and binary output files only.
 
+* Supported parsing XCOFF auxiliary symbols in obj2yaml.
+
+* ``llvm-ranlib`` now supports ``-X`` on AIX to specify the type of object file
+  ranlib should examine.
+
+* ``llvm-nm`` now supports ``--export-symbol`` to ignore the import symbol file.
+
 * llvm-rc and llvm-windres now accept file path references in ``.rc`` files
   concatenated from multiple string literals.
 
@@ -387,6 +419,12 @@ Changes to Sanitizers
 ---------------------
 * HWASan now defaults to detecting use-after-scope bugs.
 
+Changes to the Profile Runtime
+------------------------------
+
+* Public header ``profile/instr_prof_interface.h`` is added to declare four
+  API functions to fine tune profile collection.
+
 Other Changes
 -------------
 
diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h
index af3ad822e0b0de..0880f9c65aa45d 100644
--- a/llvm/include/llvm/Analysis/ScalarEvolution.h
+++ b/llvm/include/llvm/Analysis/ScalarEvolution.h
@@ -1314,6 +1314,13 @@ class ScalarEvolution {
   void getPoisonGeneratingValues(SmallPtrSetImpl<const Value *> &Result,
                                  const SCEV *S);
 
+  /// Check whether it is poison-safe to represent the expression S using the
+  /// instruction I. If such a replacement is performed, the poison flags of
+  /// instructions in DropPoisonGeneratingInsts must be dropped.
+  bool canReuseInstruction(
+      const SCEV *S, Instruction *I,
+      SmallVectorImpl<Instruction *> &DropPoisonGeneratingInsts);
+
   class FoldID {
     const SCEV *Op = nullptr;
     const Type *Ty = nullptr;
diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h
index 81cdd39afc6bab..f17ba75e3efa6a 100644
--- a/llvm/include/llvm/BinaryFormat/ELF.h
+++ b/llvm/include/llvm/BinaryFormat/ELF.h
@@ -1464,6 +1464,7 @@ enum {
   PT_OPENBSD_RANDOMIZE = 0x65a3dbe6, // Fill with random data.
   PT_OPENBSD_WXNEEDED = 0x65a3dbe7,  // Program does W^X violations.
   PT_OPENBSD_NOBTCFI = 0x65a3dbe8,   // Do not enforce branch target CFI.
+  PT_OPENBSD_SYSCALLS = 0x65a3dbe9,  // System call sites.
   PT_OPENBSD_BOOTDATA = 0x65a41be6,  // Section for boot arguments.
 
   // ARM program header types.
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 2acb45837c480a..4b2db80bc1ec30 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -4184,6 +4184,68 @@ void ScalarEvolution::getPoisonGeneratingValues(
     Result.insert(SU->getValue());
 }
 
+bool ScalarEvolution::canReuseInstruction(
+    const SCEV *S, Instruction *I,
+    SmallVectorImpl<Instruction *> &DropPoisonGeneratingInsts) {
+  // If the instruction cannot be poison, it's always safe to reuse.
+  if (programUndefinedIfPoison(I))
+    return true;
+
+  // Otherwise, it is possible that I is more poisonous that S. Collect the
+  // poison-contributors of S, and then check whether I has any additional
+  // poison-contributors. Poison that is contributed through poison-generating
+  // flags is handled by dropping those flags instead.
+  SmallPtrSet<const Value *, 8> PoisonVals;
+  getPoisonGeneratingValues(PoisonVals, S);
+
+  SmallVector<Value *> Worklist;
+  SmallPtrSet<Value *, 8> Visited;
+  Worklist.push_back(I);
+  while (!Worklist.empty()) {
+    Value *V = Worklist.pop_back_val();
+    if (!Visited.insert(V).second)
+      continue;
+
+    // Avoid walking large instruction graphs.
+    if (Visited.size() > 16)
+      return false;
+
+    // Either the value can't be poison, or the S would also be poison if it
+    // is.
+    if (PoisonVals.contains(V) || isGuaranteedNotToBePoison(V))
+      continue;
+
+    auto *I = dyn_cast<Instruction>(V);
+    if (!I)
+      return false;
+
+    // Disjoint or instructions are interpreted as adds by SCEV. However, we
+    // can't replace an arbitrary add with disjoint or, even if we drop the
+    // flag. We would need to convert the or into an add.
+    if (auto *PDI = dyn_cast<PossiblyDisjointInst>(I))
+      if (PDI->isDisjoint())
+        return false;
+
+    // FIXME: Ignore vscale, even though it technically could be poison. Do this
+    // because SCEV currently assumes it can't be poison. Remove this special
+    // case once we proper model when vscale can be poison.
+    if (auto *II = dyn_cast<IntrinsicInst>(I);
+        II && II->getIntrinsicID() == Intrinsic::vscale)
+      continue;
+
+    if (canCreatePoison(cast<Operator>(I), /*ConsiderFlagsAndMetadata*/ false))
+      return false;
+
+    // If the instruction can't create poison, we can recurse to its operands.
+    if (I->hasPoisonGeneratingFlagsOrMetadata())
+      DropPoisonGeneratingInsts.push_back(I);
+
+    for (Value *Op : I->operands())
+      Worklist.push_back(Op);
+  }
+  return true;
+}
+
 const SCEV *
 ScalarEvolution::getSequentialMinMaxExpr(SCEVTypes Kind,
                                          SmallVectorImpl<const SCEV *> &Ops) {
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 5d6c3465a0c364..412115eb649c2f 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -5083,8 +5083,13 @@ void computeKnownFPClass(const Value *V, const APInt &DemandedElts,
         Op->getOperand(0)->getType()->getScalarType()->getFltSemantics();
 
     // All subnormal inputs should be in the normal range in the result type.
-    if (APFloat::isRepresentableAsNormalIn(SrcTy, DstTy))
+    if (APFloat::isRepresentableAsNormalIn(SrcTy, DstTy)) {
+      if (Known.KnownFPClasses & fcPosSubnormal)
+        Known.KnownFPClasses |= fcPosNormal;
+      if (Known.KnownFPClasses & fcNegSubnormal)
+        Known.KnownFPClasses |= fcNegNormal;
       Known.knownNot(fcSubnormal);
+    }
 
     // Sign bit of a nan isn't guaranteed.
     if (!Known.isKnownNeverNaN())
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 3b2cf319109273..c0c22e36004f72 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -596,6 +596,8 @@ llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall,
                     LostDebugLocObserver &LocObserver, MachineInstr *MI) {
   auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
   const char *Name = TLI.getLibcallName(Libcall);
+  if (!Name)
+    return LegalizerHelper::UnableToLegalize;
   const CallingConv::ID CC = TLI.getLibcallCallingConv(Libcall);
   return createLibcall(MIRBuilder, Name, Result, Args, CC, LocObserver, MI);
 }
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 98d8a6d9409f25..3135ec73a99e76 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -9253,7 +9253,7 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
 
   // Transfer chain users from old loads to the new load.
   for (LoadSDNode *L : Loads)
-    DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1));
+    DAG.makeEquivalentMemoryOrdering(L, NewLoad);
 
   if (!NeedsBswap)
     return NewLoad;
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 2d6b5bc983e739..0287856560e91a 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -10718,6 +10718,14 @@ AArch64TargetLowering::getRegForInlineAsmConstraint(
       parseConstraintCode(Constraint) != AArch64CC::Invalid)
     return std::make_pair(unsigned(AArch64::NZCV), &AArch64::CCRRegClass);
 
+  if (Constraint == "{za}") {
+    return std::make_pair(unsigned(AArch64::ZA), &AArch64::MPRRegClass);
+  }
+
+  if (Constraint == "{zt0}") {
+    return std::make_pair(unsigned(AArch64::ZT0), &AArch64::ZTRRegClass);
+  }
+
   // Use the default implementation in TargetLowering to convert the register
   // constraint into a member of a register class.
   std::pair<unsigned, const TargetRegisterClass *> Res;
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
index f86e6947c9cdb0..48e1c1bc73022c 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -507,6 +507,10 @@ bool AArch64RegisterInfo::isAsmClobberable(const MachineFunction &MF,
         MCRegisterInfo::regsOverlap(PhysReg, AArch64::X16))
     return true;
 
+  // ZA/ZT0 registers are reserved but may be permitted in the clobber list.
+  if (PhysReg == AArch64::ZA || PhysReg == AArch64::ZT0)
+    return true;
+
   return !isReservedReg(MF, PhysReg);
 }
 
diff --git a/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index 3c673ae938fdec..36aab383da68d2 100644
--- a/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -2920,6 +2920,11 @@ bool MipsAsmParser::loadAndAddSymbolAddress(const MCExpr *SymExpr,
         (Res.getSymA()->getSymbol().isELF() &&
          cast<MCSymbolELF>(Res.getSymA()->getSymbol()).getBinding() ==
              ELF::STB_LOCAL);
+    // For O32, "$"-prefixed symbols are recognized as temporary while
+    // .L-prefixed symbols are not (PrivateGlobalPrefix is "$"). Recognize ".L"
+    // manually.
+    if (ABI.IsO32() && Res.getSymA()->getSymbol().getName().starts_with(".L"))
+      IsLocalSym = true;
     bool UseXGOT = STI->hasFeature(Mips::FeatureXGOT) && !IsLocalSym;
 
     // The case where the result register is $25 is somewhat special. If the
@@ -6359,7 +6364,7 @@ bool MipsAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
       return true;
 
     SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
-    MCSymbol *Sym = getContext().getOrCreateSymbol("$" + Identifier);
+    MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier);
     // Otherwise create a symbol reference.
     const MCExpr *SymRef =
         MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext());
diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 26ed74108ec36c..18a4223d481ef0 100644
--- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -1635,7 +1635,8 @@ class BitPermutationSelector {
     default: break;
     case ISD::ROTL:
       if (isa<ConstantSDNode>(V.getOperand(1))) {
-        unsigned RotAmt = V.getConstantOperandVal(1);
+        assert(isPowerOf2_32(NumBits) && "rotl bits should be power of 2!");
+        unsigned RotAmt = V.getConstantOperandVal(1) & (NumBits - 1);
 
         const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
 
@@ -1648,15 +1649,20 @@ class BitPermutationSelector {
     case ISD::SHL:
     case PPCISD::SHL:
       if (isa<ConstantSDNode>(V.getOperand(1))) {
-        unsigned ShiftAmt = V.getConstantOperandVal(1);
+        // sld takes 7 bits, slw takes 6.
+        unsigned ShiftAmt = V.getConstantOperandVal(1) & ((NumBits << 1) - 1);
 
         const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
 
-        for (unsigned i = ShiftAmt; i < NumBits; ++i)
-          Bits[i] = LHSBits[i - ShiftAmt];
-
-        for (unsigned i = 0; i < ShiftAmt; ++i)
-          Bits[i] = ValueBit(ValueBit::ConstZero);
+        if (ShiftAmt >= NumBits) {
+          for (unsigned i = 0; i < NumBits; ++i)
+            Bits[i] = ValueBit(ValueBit::ConstZero);
+        } else {
+          for (unsigned i = ShiftAmt; i < NumBits; ++i)
+            Bits[i] = LHSBits[i - ShiftAmt];
+          for (unsigned i = 0; i < ShiftAmt; ++i)
+            Bits[i] = ValueBit(ValueBit::ConstZero);
+        }
 
         return std::make_pair(Interesting = true, &Bits);
       }
@@ -1664,15 +1670,20 @@ class BitPermutationSelector {
     case ISD::SRL:
     case PPCISD::SRL:
       if (isa<ConstantSDNode>(V.getOperand(1))) {
-        unsigned ShiftAmt = V.getConstantOperandVal(1);
+        // srd takes lowest 7 bits, srw takes 6.
+        unsigned ShiftAmt = V.getConstantOperandVal(1) & ((NumBits << 1) - 1);
 
         const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
 
-        for (unsigned i = 0; i < NumBits - ShiftAmt; ++i)
-          Bits[i] = LHSBits[i + ShiftAmt];
-
-        for (unsigned i = NumBits - ShiftAmt; i < NumBits; ++i)
-          Bits[i] = ValueBit(ValueBit::ConstZero);
+        if (ShiftAmt >= NumBits) {
+          for (unsigned i = 0; i < NumBits; ++i)
+            Bits[i] = ValueBit(ValueBit::ConstZero);
+        } else {
+          for (unsigned i = 0; i < NumBits - ShiftAmt; ++i)
+            Bits[i] = LHSBits[i + ShiftAmt];
+          for (unsigned i = NumBits - ShiftAmt; i < NumBits; ++i)
+            Bits[i] = ValueBit(ValueBit::ConstZero);
+        }
 
         return std::make_pair(Interesting = true, &Bits);
       }
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 7895d74f06d12a..80447d03c000b8 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -5033,60 +5033,56 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
   MVT IndexContainerVT =
       ContainerVT.changeVectorElementType(IndexVT.getScalarType());
 
-  // Base case for the recursion just below - handle the worst case
-  // single source permutation.  Note that all the splat variants
-  // are handled above.
-  if (V2.isUndef()) {
+  SDValue Gather;
+  // TODO: This doesn't trigger for i64 vectors on RV32, since there we
+  // encounter a bitcasted BUILD_VECTOR with low/high i32 values.
+  if (SDValue SplatValue = DAG.getSplatValue(V1, /*LegalTypes*/ true)) {
+    Gather = lowerScalarSplat(SDValue(), SplatValue, VL, ContainerVT, DL, DAG,
+                              Subtarget);
+  } else {
     V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
-    SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS);
-    LHSIndices = convertToScalableVector(IndexContainerVT, LHSIndices, DAG,
-                                         Subtarget);
-    SDValue Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices,
-                                 DAG.getUNDEF(ContainerVT), TrueMask, VL);
-    return convertFromScalableVector(VT, Gather, DAG, Subtarget);
-  }
-
-  // Translate the gather index we computed above (and possibly swapped)
-  // back to a shuffle mask.  This step should disappear once we complete
-  // the migration to recursive design.
-  SmallVector<int> ShuffleMaskLHS;
-  ShuffleMaskLHS.reserve(GatherIndicesLHS.size());
-  for (SDValue GatherIndex : GatherIndicesLHS) {
-    if (GatherIndex.isUndef()) {
-      ShuffleMaskLHS.push_back(-1);
-      continue;
+    // If only one index is used, we can use a "splat" vrgather.
+    // TODO: We can splat the most-common index and fix-up any stragglers, if
+    // that's beneficial.
+    if (LHSIndexCounts.size() == 1) {
+      int SplatIndex = LHSIndexCounts.begin()->getFirst();
+      Gather = DAG.getNode(GatherVXOpc, DL, ContainerVT, V1,
+                           DAG.getConstant(SplatIndex, DL, XLenVT),
+                           DAG.getUNDEF(ContainerVT), TrueMask, VL);
+    } else {
+      SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS);
+      LHSIndices =
+          convertToScalableVector(IndexContainerVT, LHSIndices, DAG, Subtarget);
+
+      Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices,
+                           DAG.getUNDEF(ContainerVT), TrueMask, VL);
     }
-    auto *IdxC = cast<ConstantSDNode>(GatherIndex);
-    ShuffleMaskLHS.push_back(IdxC->getZExtValue());
   }
 
-  // Recursively invoke lowering for the LHS as if there were no RHS.
-  // This allows us to leverage all of our single source permute tricks.
-  SDValue Gather =
-    DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT), ShuffleMaskLHS);
-  Gather = convertToScalableVector(ContainerVT, Gather, DAG, Subtarget);
+  // If a second vector operand is used by this shuffle, blend it in with an
+  // additional vrgather.
+  if (!V2.isUndef()) {
+    V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget);
 
-  // Blend in second vector source with an additional vrgather.
-  V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget);
+    MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
+    SelectMask =
+        convertToScalableVector(MaskContainerVT, SelectMask, DAG, Subtarget);
 
-  MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
-  SelectMask =
-    convertToScalableVector(MaskContainerVT, SelectMask, DAG, Subtarget);
-
-  // If only one index is used, we can use a "splat" vrgather.
-  // TODO: We can splat the most-common index and fix-up any stragglers, if
-  // that's beneficial.
-  if (RHSIndexCounts.size() == 1) {
-    int SplatIndex = RHSIndexCounts.begin()->getFirst();
-    Gather = DAG.getNode(GatherVXOpc, DL, ContainerVT, V2,
-                         DAG.getConstant(SplatIndex, DL, XLenVT), Gather,
-                         SelectMask, VL);
-  } else {
-    SDValue RHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesRHS);
-    RHSIndices =
-      convertToScalableVector(IndexContainerVT, RHSIndices, DAG, Subtarget);
-    Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V2, RHSIndices, Gather,
-                         SelectMask, VL);
+    // If only one index is used, we can use a "splat" vrgather.
+    // TODO: We can splat the most-common index and fix-up any stragglers, if
+    // that's beneficial.
+    if (RHSIndexCounts.size() == 1) {
+      int SplatIndex = RHSIndexCounts.begin()->getFirst();
+      Gather = DAG.getNode(GatherVXOpc, DL, ContainerVT, V2,
+                           DAG.getConstant(SplatIndex, DL, XLenVT), Gather,
+                           SelectMask, VL);
+    } else {
+      SDValue RHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesRHS);
+      RHSIndices =
+          convertToScalableVector(IndexContainerVT, RHSIndices, DAG, Subtarget);
+      Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V2, RHSIndices, Gather,
+                           SelectMask, VL);
+    }
   }
 
   return convertFromScalableVector(VT, Gather, DAG, Subtarget);
@@ -14658,8 +14654,8 @@ static SDValue useInversedSetcc(SDNode *N, SelectionDAG &DAG,
     ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
     if (CC == ISD::SETEQ && LHS.getOpcode() == ISD::AND &&
         isa<ConstantSDNode>(LHS.getOperand(1)) && isNullConstant(RHS)) {
-      uint64_t MaskVal = LHS.getConstantOperandVal(1);
-      if (isPowerOf2_64(MaskVal) && !isInt<12>(MaskVal))
+      const APInt &MaskVal = LHS.getConstantOperandAPInt(1);
+      if (MaskVal.isPowerOf2() && !MaskVal.isSignedIntN(12))
         return DAG.getSelect(DL, VT,
                              DAG.getSetCC(DL, CondVT, LHS, RHS, ISD::SETNE),
                              False, True);
@@ -15565,7 +15561,7 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
           MGN->getMemOperand(), IndexType, MGN->getExtensionType());
 
     if (Index.getOpcode() == ISD::BUILD_VECTOR &&
-        MGN->getExtensionType() == ISD::NON_EXTLOAD) {
+        MGN->getExtensionType() == ISD::NON_EXTLOAD && isTypeLegal(VT)) {
       if (std::optional<VIDSequence> SimpleVID = isSimpleVIDSequence(Index);
           SimpleVID && SimpleVID->StepDenominator == 1) {
         const int64_t StepNumerator = SimpleVID->StepNumerator;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 592962cebe8973..d5b1ddfbeb3dc9 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -1229,7 +1229,8 @@ bool RISCVInstrInfo::optimizeCondBranch(MachineInstr &MI) const {
     MachineBasicBlock::reverse_iterator II(&MI), E = MBB->rend();
     auto DefC1 = std::find_if(++II, E, [&](const MachineInstr &I) -> bool {
       int64_t Imm;
-      return isLoadImm(&I, Imm) && Imm == C1;
+      return isLoadImm(&I, Imm) && Imm == C1 &&
+             I.getOperand(0).getReg().isVirtual();
     });
     if (DefC1 != E)
       return DefC1->getOperand(0).getReg();
diff --git a/llvm/lib/Target/Sparc/Sparc.td b/llvm/lib/Target/Sparc/Sparc.td
index 7b103395652433..38a59e650f33c7 100644
--- a/llvm/lib/Target/Sparc/Sparc.td
+++ b/llvm/lib/Target/Sparc/Sparc.td
@@ -72,6 +72,20 @@ def TuneSlowRDPC : SubtargetFeature<"slow-rdpc", "HasSlowRDPC", "true",
 //==== Features added predmoninantly for LEON subtarget support
 include "LeonFeatures.td"
 
+//==== Register allocation tweaks needed by some low-level software
+foreach i = 1 ... 7  in
+    def FeatureReserveG#i : SubtargetFeature<"reserve-g"#i, "ReserveRegister["#i#" + SP::G0]", "true",
+                                             "Reserve G"#i#", making it unavailable as a GPR">;
+foreach i = 0 ... 5 in
+    def FeatureReserveO#i : SubtargetFeature<"reserve-o"#i, "ReserveRegister["#i#" + SP::O0]", "true",
+                                             "Reserve O"#i#", making it unavailable as a GPR">;
+foreach i = 0 ... 7 in
+    def FeatureReserveL#i : SubtargetFeature<"reserve-l"#i, "ReserveRegister["#i#" + SP::L0]", "true",
+                                             "Reserve L"#i#", making it unavailable as a GPR">;
+foreach i = 0 ... 5 in
+    def FeatureReserveI#i : SubtargetFeature<"reserve-i"#i, "ReserveRegister["#i#" + SP::I0]", "true",
+                                             "Reserve I"#i#", making it unavailable as a GPR">;
+
 //===----------------------------------------------------------------------===//
 // Register File, Calling Conv, Instruction Descriptions
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/llvm/lib/Target/Sparc/SparcISelLowering.cpp
index 78bdf3ae9a84ba..bdefb0841a124b 100644
--- a/llvm/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/llvm/lib/Target/Sparc/SparcISelLowering.cpp
@@ -13,6 +13,7 @@
 
 #include "SparcISelLowering.h"
 #include "MCTargetDesc/SparcMCExpr.h"
+#include "MCTargetDesc/SparcMCTargetDesc.h"
 #include "SparcMachineFunctionInfo.h"
 #include "SparcRegisterInfo.h"
 #include "SparcTargetMachine.h"
@@ -28,6 +29,7 @@
 #include "llvm/CodeGen/SelectionDAGNodes.h"
 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
 #include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -729,6 +731,30 @@ SDValue SparcTargetLowering::LowerFormalArguments_64(
   return Chain;
 }
 
+// Check whether any of the argument registers are reserved
+static bool isAnyArgRegReserved(const SparcRegisterInfo *TRI,
+                                const MachineFunction &MF) {
+  // The register window design means that outgoing parameters at O*
+  // will appear in the callee as I*.
+  // Be conservative and check both sides of the register names.
+  bool Outgoing =
+      llvm::any_of(SP::GPROutgoingArgRegClass, [TRI, &MF](MCPhysReg r) {
+        return TRI->isReservedReg(MF, r);
+      });
+  bool Incoming =
+      llvm::any_of(SP::GPRIncomingArgRegClass, [TRI, &MF](MCPhysReg r) {
+        return TRI->isReservedReg(MF, r);
+      });
+  return Outgoing || Incoming;
+}
+
+static void emitReservedArgRegCallError(const MachineFunction &MF) {
+  const Function &F = MF.getFunction();
+  F.getContext().diagnose(DiagnosticInfoUnsupported{
+      F, ("SPARC doesn't support"
+          " function calls if any of the argument registers is reserved.")});
+}
+
 SDValue
 SparcTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
                                SmallVectorImpl<SDValue> &InVals) const {
@@ -805,6 +831,7 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
   bool &isTailCall                      = CLI.IsTailCall;
   CallingConv::ID CallConv              = CLI.CallConv;
   bool isVarArg                         = CLI.IsVarArg;
+  MachineFunction &MF = DAG.getMachineFunction();
 
   // Analyze operands of the call, assigning locations to each operand.
   SmallVector<CCValAssign, 16> ArgLocs;
@@ -1055,6 +1082,10 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
       ((hasReturnsTwice)
            ? TRI->getRTCallPreservedMask(CallConv)
            : TRI->getCallPreservedMask(DAG.getMachineFunction(), CallConv));
+
+  if (isAnyArgRegReserved(TRI, MF))
+    emitReservedArgRegCallError(MF);
+
   assert(Mask && "Missing call preserved mask for calling convention");
   Ops.push_back(DAG.getRegisterMask(Mask));
 
@@ -1125,6 +1156,13 @@ Register SparcTargetLowering::getRegisterByName(const char* RegName, LLT VT,
     .Case("g4", SP::G4).Case("g5", SP::G5).Case("g6", SP::G6).Case("g7", SP::G7)
     .Default(0);
 
+  // If we're directly referencing register names
+  // (e.g in GCC C extension `register int r asm("g1");`),
+  // make sure that said register is in the reserve list.
+  const SparcRegisterInfo *TRI = Subtarget->getRegisterInfo();
+  if (!TRI->isReservedReg(MF, Reg))
+    Reg = 0;
+
   if (Reg)
     return Reg;
 
@@ -1189,6 +1227,7 @@ SparcTargetLowering::LowerCall_64(TargetLowering::CallLoweringInfo &CLI,
   SDLoc DL = CLI.DL;
   SDValue Chain = CLI.Chain;
   auto PtrVT = getPointerTy(DAG.getDataLayout());
+  MachineFunction &MF = DAG.getMachineFunction();
 
   // Analyze operands of the call, assigning locations to each operand.
   SmallVector<CCValAssign, 16> ArgLocs;
@@ -1372,6 +1411,10 @@ SparcTargetLowering::LowerCall_64(TargetLowering::CallLoweringInfo &CLI,
       ((hasReturnsTwice) ? TRI->getRTCallPreservedMask(CLI.CallConv)
                          : TRI->getCallPreservedMask(DAG.getMachineFunction(),
                                                      CLI.CallConv));
+
+  if (isAnyArgRegReserved(TRI, MF))
+    emitReservedArgRegCallError(MF);
+
   assert(Mask && "Missing call preserved mask for calling convention");
   Ops.push_back(DAG.getRegisterMask(Mask));
 
diff --git a/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp b/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp
index f97bf57627d1aa..71a27f77d2c6bf 100644
--- a/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp
+++ b/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp
@@ -12,10 +12,8 @@
 
 #include "SparcRegisterInfo.h"
 #include "Sparc.h"
-#include "SparcMachineFunctionInfo.h"
 #include "SparcSubtarget.h"
 #include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -98,9 +96,21 @@ BitVector SparcRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   for (unsigned n = 0; n < 31; n++)
     Reserved.set(SP::ASR1 + n);
 
+  for (TargetRegisterClass::iterator i = SP::IntRegsRegClass.begin();
+       i != SP::IntRegsRegClass.end(); ++i) {
+    if (MF.getSubtarget<SparcSubtarget>().isRegisterReserved(*i))
+      markSuperRegs(Reserved, *i);
+  }
+
+  assert(checkAllSuperRegsMarked(Reserved));
   return Reserved;
 }
 
+bool SparcRegisterInfo::isReservedReg(const MachineFunction &MF,
+                                      MCRegister Reg) const {
+  return getReservedRegs(MF)[Reg];
+}
+
 const TargetRegisterClass*
 SparcRegisterInfo::getPointerRegClass(const MachineFunction &MF,
                                       unsigned Kind) const {
diff --git a/llvm/lib/Target/Sparc/SparcRegisterInfo.h b/llvm/lib/Target/Sparc/SparcRegisterInfo.h
index 5b3c1a7ad07dd5..58c85f33635f2d 100644
--- a/llvm/lib/Target/Sparc/SparcRegisterInfo.h
+++ b/llvm/lib/Target/Sparc/SparcRegisterInfo.h
@@ -30,6 +30,7 @@ struct SparcRegisterInfo : public SparcGenRegisterInfo {
   const uint32_t* getRTCallPreservedMask(CallingConv::ID CC) const;
 
   BitVector getReservedRegs(const MachineFunction &MF) const override;
+  bool isReservedReg(const MachineFunction &MF, MCRegister Reg) const;
 
   const TargetRegisterClass *getPointerRegClass(const MachineFunction &MF,
                                                 unsigned Kind) const override;
diff --git a/llvm/lib/Target/Sparc/SparcRegisterInfo.td b/llvm/lib/Target/Sparc/SparcRegisterInfo.td
index d5ba7464695c5f..d8319a8d41dda0 100644
--- a/llvm/lib/Target/Sparc/SparcRegisterInfo.td
+++ b/llvm/lib/Target/Sparc/SparcRegisterInfo.td
@@ -370,6 +370,10 @@ def LowQFPRegs : RegisterClass<"SP", [f128], 128, (sequence "Q%u", 0, 7)>;
 // Floating point control register classes.
 def FCCRegs : RegisterClass<"SP", [i1], 1, (sequence "FCC%u", 0, 3)>;
 
+// GPR argument registers.
+def GPROutgoingArg : RegisterClass<"SP", [i32, i64], 32, (sequence "O%u", 0, 5)>;
+def GPRIncomingArg : RegisterClass<"SP", [i32, i64], 32, (sequence "I%u", 0, 5)>;
+
 let isAllocatable = 0 in {
   // Ancillary state registers
   // FIXME: TICK is special-cased here as it can be accessed
diff --git a/llvm/lib/Target/Sparc/SparcSubtarget.cpp b/llvm/lib/Target/Sparc/SparcSubtarget.cpp
index 6b09904ca5e8e5..5b65e34e0f8a36 100644
--- a/llvm/lib/Target/Sparc/SparcSubtarget.cpp
+++ b/llvm/lib/Target/Sparc/SparcSubtarget.cpp
@@ -50,6 +50,7 @@ SparcSubtarget::SparcSubtarget(const StringRef &CPU, const StringRef &TuneCPU,
                                const StringRef &FS, const TargetMachine &TM,
                                bool is64Bit)
     : SparcGenSubtargetInfo(TM.getTargetTriple(), CPU, TuneCPU, FS),
+      ReserveRegister(TM.getMCRegisterInfo()->getNumRegs()),
       TargetTriple(TM.getTargetTriple()), Is64Bit(is64Bit),
       InstrInfo(initializeSubtargetDependencies(CPU, TuneCPU, FS)),
       TLInfo(TM, *this), FrameLowering(*this) {}
diff --git a/llvm/lib/Target/Sparc/SparcSubtarget.h b/llvm/lib/Target/Sparc/SparcSubtarget.h
index cdb210f67482c4..fe4aca5195306a 100644
--- a/llvm/lib/Target/Sparc/SparcSubtarget.h
+++ b/llvm/lib/Target/Sparc/SparcSubtarget.h
@@ -13,12 +13,14 @@
 #ifndef LLVM_LIB_TARGET_SPARC_SPARCSUBTARGET_H
 #define LLVM_LIB_TARGET_SPARC_SPARCSUBTARGET_H
 
+#include "MCTargetDesc/SparcMCTargetDesc.h"
 #include "SparcFrameLowering.h"
 #include "SparcISelLowering.h"
 #include "SparcInstrInfo.h"
 #include "llvm/CodeGen/SelectionDAGTargetInfo.h"
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/IR/DataLayout.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/TargetParser/Triple.h"
 #include <string>
 
@@ -29,6 +31,10 @@ namespace llvm {
 class StringRef;
 
 class SparcSubtarget : public SparcGenSubtargetInfo {
+  // ReserveRegister[i] - Register #i is not available as a general purpose
+  // register.
+  BitVector ReserveRegister;
+
   Triple TargetTriple;
   virtual void anchor();
 
@@ -82,6 +88,10 @@ class SparcSubtarget : public SparcGenSubtargetInfo {
     return is64Bit() ? 2047 : 0;
   }
 
+  bool isRegisterReserved(MCPhysReg PhysReg) const {
+    return ReserveRegister[PhysReg];
+  }
+
   /// Given a actual stack size as determined by FrameInfo, this function
   /// returns adjusted framesize which includes space for register window
   /// spills and arguments.
diff --git a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
index 733f290b1bc93a..633fcb3314c42f 100644
--- a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
+++ b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
@@ -470,6 +470,9 @@ class LowerTypeTestsModule {
 
   Function *WeakInitializerFn = nullptr;
 
+  GlobalVariable *GlobalAnnotation;
+  DenseSet<Value *> FunctionAnnotations;
+
   bool shouldExportConstantsAsAbsoluteSymbols();
   uint8_t *exportTypeId(StringRef TypeId, const TypeIdLowering &TIL);
   TypeIdLowering importTypeId(StringRef TypeId);
@@ -531,6 +534,10 @@ class LowerTypeTestsModule {
   /// replace each use, which is a direct function call.
   void replaceDirectCalls(Value *Old, Value *New);
 
+  bool isFunctionAnnotation(Value *V) const {
+    return FunctionAnnotations.contains(V);
+  }
+
 public:
   LowerTypeTestsModule(Module &M, ModuleAnalysisManager &AM,
                        ModuleSummaryIndex *ExportSummary,
@@ -1377,8 +1384,11 @@ void LowerTypeTestsModule::replaceWeakDeclarationWithJumpTablePtr(
   // (all?) targets. Switch to a runtime initializer.
   SmallSetVector<GlobalVariable *, 8> GlobalVarUsers;
   findGlobalVariableUsersOf(F, GlobalVarUsers);
-  for (auto *GV : GlobalVarUsers)
+  for (auto *GV : GlobalVarUsers) {
+    if (GV == GlobalAnnotation)
+      continue;
     moveInitializerToModuleConstructor(GV);
+  }
 
   // Can not RAUW F with an expression that uses F. Replace with a temporary
   // placeholder first.
@@ -1837,6 +1847,16 @@ LowerTypeTestsModule::LowerTypeTestsModule(
   }
   OS = TargetTriple.getOS();
   ObjectFormat = TargetTriple.getObjectFormat();
+
+  // Function annotation describes or applies to function itself, and
+  // shouldn't be associated with jump table thunk generated for CFI.
+  GlobalAnnotation = M.getGlobalVariable("llvm.global.annotations");
+  if (GlobalAnnotation && GlobalAnnotation->hasInitializer()) {
+    const ConstantArray *CA =
+        cast<ConstantArray>(GlobalAnnotation->getInitializer());
+    for (Value *Op : CA->operands())
+      FunctionAnnotations.insert(Op);
+  }
 }
 
 bool LowerTypeTestsModule::runForTesting(Module &M, ModuleAnalysisManager &AM) {
@@ -1896,10 +1916,14 @@ void LowerTypeTestsModule::replaceCfiUses(Function *Old, Value *New,
     if (isa<BlockAddress, NoCFIValue>(U.getUser()))
       continue;
 
-    // Skip direct calls to externally defined or non-dso_local functions
+    // Skip direct calls to externally defined or non-dso_local functions.
     if (isDirectCall(U) && (Old->isDSOLocal() || !IsJumpTableCanonical))
       continue;
 
+    // Skip function annotation.
+    if (isFunctionAnnotation(U.getUser()))
+      continue;
+
     // Must handle Constants specially, we cannot call replaceUsesOfWith on a
     // constant because they are uniqued.
     if (auto *C = dyn_cast<Constant>(U.getUser())) {
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 249f4a7710e046..5d207dcfd18dd4 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -2594,10 +2594,10 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
         Value *V;
         if ((has_single_bit(TyAllocSize) &&
              match(GEP.getOperand(1),
-                   m_Exact(m_AShr(m_Value(V),
-                                  m_SpecificInt(countr_zero(TyAllocSize)))))) ||
+                   m_Exact(m_Shr(m_Value(V),
+                                 m_SpecificInt(countr_zero(TyAllocSize)))))) ||
             match(GEP.getOperand(1),
-                  m_Exact(m_SDiv(m_Value(V), m_SpecificInt(TyAllocSize))))) {
+                  m_Exact(m_IDiv(m_Value(V), m_SpecificInt(TyAllocSize))))) {
           GetElementPtrInst *NewGEP = GetElementPtrInst::Create(
               Builder.getInt8Ty(), GEP.getPointerOperand(), V);
           NewGEP->setIsInBounds(GEP.isInBounds());
diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
index a1d7f0f9ba0f74..a3951fdf8a1589 100644
--- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
+++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
@@ -1366,61 +1366,6 @@ Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty) {
   return V;
 }
 
-static bool
-canReuseInstruction(ScalarEvolution &SE, const SCEV *S, Instruction *I,
-                    SmallVectorImpl<Instruction *> &DropPoisonGeneratingInsts) {
-  // If the instruction cannot be poison, it's always safe to reuse.
-  if (programUndefinedIfPoison(I))
-    return true;
-
-  // Otherwise, it is possible that I is more poisonous that S. Collect the
-  // poison-contributors of S, and then check whether I has any additional
-  // poison-contributors. Poison that is contributed through poison-generating
-  // flags is handled by dropping those flags instead.
-  SmallPtrSet<const Value *, 8> PoisonVals;
-  SE.getPoisonGeneratingValues(PoisonVals, S);
-
-  SmallVector<Value *> Worklist;
-  SmallPtrSet<Value *, 8> Visited;
-  Worklist.push_back(I);
-  while (!Worklist.empty()) {
-    Value *V = Worklist.pop_back_val();
-    if (!Visited.insert(V).second)
-      continue;
-
-    // Avoid walking large instruction graphs.
-    if (Visited.size() > 16)
-      return false;
-
-    // Either the value can't be poison, or the S would also be poison if it
-    // is.
-    if (PoisonVals.contains(V) || isGuaranteedNotToBePoison(V))
-      continue;
-
-    auto *I = dyn_cast<Instruction>(V);
-    if (!I)
-      return false;
-
-    // FIXME: Ignore vscale, even though it technically could be poison. Do this
-    // because SCEV currently assumes it can't be poison. Remove this special
-    // case once we proper model when vscale can be poison.
-    if (auto *II = dyn_cast<IntrinsicInst>(I);
-        II && II->getIntrinsicID() == Intrinsic::vscale)
-      continue;
-
-    if (canCreatePoison(cast<Operator>(I), /*ConsiderFlagsAndMetadata*/ false))
-      return false;
-
-    // If the instruction can't create poison, we can recurse to its operands.
-    if (I->hasPoisonGeneratingFlagsOrMetadata())
-      DropPoisonGeneratingInsts.push_back(I);
-
-    for (Value *Op : I->operands())
-      Worklist.push_back(Op);
-  }
-  return true;
-}
-
 Value *SCEVExpander::FindValueInExprValueMap(
     const SCEV *S, const Instruction *InsertPt,
     SmallVectorImpl<Instruction *> &DropPoisonGeneratingInsts) {
@@ -1448,7 +1393,7 @@ Value *SCEVExpander::FindValueInExprValueMap(
       continue;
 
     // Make sure reusing the instruction is poison-safe.
-    if (canReuseInstruction(SE, S, EntInst, DropPoisonGeneratingInsts))
+    if (SE.canReuseInstruction(S, EntInst, DropPoisonGeneratingInsts))
       return V;
     DropPoisonGeneratingInsts.clear();
   }
diff --git a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
index 0ed3324a27b6c9..1b142f14d81139 100644
--- a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -16,6 +16,7 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Instructions.h"
@@ -713,8 +714,11 @@ bool SimplifyIndvar::replaceFloatIVWithIntegerIV(Instruction *UseInst) {
 bool SimplifyIndvar::eliminateIdentitySCEV(Instruction *UseInst,
                                            Instruction *IVOperand) {
   if (!SE->isSCEVable(UseInst->getType()) ||
-      (UseInst->getType() != IVOperand->getType()) ||
-      (SE->getSCEV(UseInst) != SE->getSCEV(IVOperand)))
+      UseInst->getType() != IVOperand->getType())
+    return false;
+
+  const SCEV *UseSCEV = SE->getSCEV(UseInst);
+  if (UseSCEV != SE->getSCEV(IVOperand))
     return false;
 
   // getSCEV(X) == getSCEV(Y) does not guarantee that X and Y are related in the
@@ -742,6 +746,16 @@ bool SimplifyIndvar::eliminateIdentitySCEV(Instruction *UseInst,
   if (!LI->replacementPreservesLCSSAForm(UseInst, IVOperand))
     return false;
 
+  // Make sure the operand is not more poisonous than the instruction.
+  if (!impliesPoison(IVOperand, UseInst)) {
+    SmallVector<Instruction *> DropPoisonGeneratingInsts;
+    if (!SE->canReuseInstruction(UseSCEV, IVOperand, DropPoisonGeneratingInsts))
+      return false;
+
+    for (Instruction *I : DropPoisonGeneratingInsts)
+      I->dropPoisonGeneratingFlagsAndMetadata();
+  }
+
   LLVM_DEBUG(dbgs() << "INDVARS: Eliminated identity: " << *UseInst << '\n');
 
   SE->forgetValue(UseInst);
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 601d2454c1e163..0a9e2c7f49f55f 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -10215,8 +10215,18 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis {
       UniqueBases.insert(VecBase);
       // If the only one use is vectorized - can delete the extractelement
       // itself.
-      if (!EI->hasOneUse() || any_of(EI->users(), [&](User *U) {
-            return !R.ScalarToTreeEntry.count(U);
+      if (!EI->hasOneUse() || (NumParts != 1 && count(E->Scalars, EI) > 1) ||
+          any_of(EI->users(), [&](User *U) {
+            const TreeEntry *UTE = R.getTreeEntry(U);
+            return !UTE || R.MultiNodeScalars.contains(U) ||
+                   count_if(R.VectorizableTree,
+                            [&](const std::unique_ptr<TreeEntry> &TE) {
+                              return any_of(TE->UserTreeIndices,
+                                            [&](const EdgeInfo &Edge) {
+                                              return Edge.UserTE == UTE;
+                                            }) &&
+                                     is_contained(TE->Scalars, EI);
+                            }) != 1;
           }))
         continue;
       R.eraseInstruction(EI);
diff --git a/llvm/test/CodeGen/AArch64/aarch64-za-clobber.ll b/llvm/test/CodeGen/AArch64/aarch64-za-clobber.ll
new file mode 100644
index 00000000000000..a8cba7dc9a91e9
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/aarch64-za-clobber.ll
@@ -0,0 +1,16 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-none-linux-gnu -stop-after=aarch64-isel < %s -o - | FileCheck %s
+
+define void @alpha(<vscale x 4 x i32> %x) local_unnamed_addr {
+entry:
+; CHECK: INLINEASM &"movt zt0[3, mul vl], z0", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def early-clobber $za
+  tail call void asm sideeffect "movt zt0[3, mul vl], z0", "~{za}"()
+  ret void
+}
+
+define void @beta(<vscale x 4 x i32> %x) local_unnamed_addr {
+entry:
+; CHECK: INLINEASM &"movt zt0[3, mul vl], z0", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def early-clobber $zt0
+  tail call void asm sideeffect "movt zt0[3, mul vl], z0", "~{zt0}"()
+  ret void
+}
diff --git a/llvm/test/CodeGen/AArch64/win64-fpowi.ll b/llvm/test/CodeGen/AArch64/win64-fpowi.ll
new file mode 100644
index 00000000000000..3eb74f8394ec4e
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/win64-fpowi.ll
@@ -0,0 +1,136 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
+; RUN: llc -mtriple=aarch64-pc-windows-msvc19 -verify-machineinstrs %s -o - | FileCheck %s
+; RUN: llc -mtriple=aarch64-pc-windows-msvc19 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - | FileCheck %s
+
+define double @powi_f64(double %a, i32 %b) {
+; CHECK-LABEL: powi_f64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    scvtf d1, w0
+; CHECK-NEXT:    b pow
+entry:
+  %c = call double @llvm.powi.f64.i32(double %a, i32 %b)
+  ret double %c
+}
+
+define float @powi_f32(float %a, i32 %b) {
+; CHECK-LABEL: powi_f32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    scvtf s1, w0
+; CHECK-NEXT:    b powf
+entry:
+  %c = call float @llvm.powi.f32.i32(float %a, i32 %b)
+  ret float %c
+}
+
+define half @powi_f16(half %a, i32 %b) {
+; CHECK-LABEL: powi_f16:
+; CHECK:       .seh_proc powi_f16
+; CHECK-NEXT:  // %bb.0: // %entry
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .seh_save_reg_x x30, 16
+; CHECK-NEXT:    .seh_endprologue
+; CHECK-NEXT:    fcvt s0, h0
+; CHECK-NEXT:    scvtf s1, w0
+; CHECK-NEXT:    bl powf
+; CHECK-NEXT:    fcvt h0, s0
+; CHECK-NEXT:    .seh_startepilogue
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    .seh_save_reg_x x30, 16
+; CHECK-NEXT:    .seh_endepilogue
+; CHECK-NEXT:    ret
+; CHECK-NEXT:    .seh_endfunclet
+; CHECK-NEXT:    .seh_endproc
+entry:
+  %c = call half @llvm.powi.f16.i32(half %a, i32 %b)
+  ret half %c
+}
+
+define <2 x double> @powi_v2f64(<2 x double> %a, i32 %b) {
+; CHECK-LABEL: powi_v2f64:
+; CHECK:       .seh_proc powi_v2f64
+; CHECK-NEXT:  // %bb.0: // %entry
+; CHECK-NEXT:    sub sp, sp, #48
+; CHECK-NEXT:    .seh_stackalloc 48
+; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-NEXT:    .seh_save_reg x30, 32
+; CHECK-NEXT:    str d8, [sp, #40] // 8-byte Folded Spill
+; CHECK-NEXT:    .seh_save_freg d8, 40
+; CHECK-NEXT:    .seh_endprologue
+; CHECK-NEXT:    scvtf d8, w0
+; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    mov d0, v0.d[1]
+; CHECK-NEXT:    fmov d1, d8
+; CHECK-NEXT:    bl pow
+; CHECK-NEXT:    fmov d1, d8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    bl pow
+; CHECK-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    .seh_startepilogue
+; CHECK-NEXT:    ldr d8, [sp, #40] // 8-byte Folded Reload
+; CHECK-NEXT:    .seh_save_freg d8, 40
+; CHECK-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-NEXT:    .seh_save_reg x30, 32
+; CHECK-NEXT:    add sp, sp, #48
+; CHECK-NEXT:    .seh_stackalloc 48
+; CHECK-NEXT:    .seh_endepilogue
+; CHECK-NEXT:    ret
+; CHECK-NEXT:    .seh_endfunclet
+; CHECK-NEXT:    .seh_endproc
+entry:
+  %c = call <2 x double> @llvm.powi.v2f64.i32(<2 x double> %a, i32 %b)
+  ret <2 x double> %c
+}
+
+define <2 x float> @powi_v2f32(<2 x float> %a, i32 %b) {
+; CHECK-LABEL: powi_v2f32:
+; CHECK:       .seh_proc powi_v2f32
+; CHECK-NEXT:  // %bb.0: // %entry
+; CHECK-NEXT:    sub sp, sp, #48
+; CHECK-NEXT:    .seh_stackalloc 48
+; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-NEXT:    .seh_save_reg x30, 32
+; CHECK-NEXT:    str d8, [sp, #40] // 8-byte Folded Spill
+; CHECK-NEXT:    .seh_save_freg d8, 40
+; CHECK-NEXT:    .seh_endprologue
+; CHECK-NEXT:    scvtf s8, w0
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    mov s0, v0.s[1]
+; CHECK-NEXT:    fmov s1, s8
+; CHECK-NEXT:    bl powf
+; CHECK-NEXT:    fmov s1, s8
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; CHECK-NEXT:    bl powf
+; CHECK-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-NEXT:    mov v0.s[1], v1.s[0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    .seh_startepilogue
+; CHECK-NEXT:    ldr d8, [sp, #40] // 8-byte Folded Reload
+; CHECK-NEXT:    .seh_save_freg d8, 40
+; CHECK-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-NEXT:    .seh_save_reg x30, 32
+; CHECK-NEXT:    add sp, sp, #48
+; CHECK-NEXT:    .seh_stackalloc 48
+; CHECK-NEXT:    .seh_endepilogue
+; CHECK-NEXT:    ret
+; CHECK-NEXT:    .seh_endfunclet
+; CHECK-NEXT:    .seh_endproc
+entry:
+  %c = call <2 x float> @llvm.powi.v2f32.i32(<2 x float> %a, i32 %b)
+  ret <2 x float> %c
+}
+
+declare <2 x double> @llvm.powi.v2f64.i32(<2 x double>, i32)
+declare <2 x float> @llvm.powi.v2f32.i32(<2 x float>, i32)
+declare double @llvm.powi.f64.i32(double, i32)
+declare float @llvm.powi.f32.i32(float, i32)
+declare half @llvm.powi.f16.i32(half, i32)
diff --git a/llvm/test/CodeGen/Mips/hf1_body.ll b/llvm/test/CodeGen/Mips/hf1_body.ll
index 184ea31bddc9d2..c3dea67896210a 100644
--- a/llvm/test/CodeGen/Mips/hf1_body.ll
+++ b/llvm/test/CodeGen/Mips/hf1_body.ll
@@ -23,8 +23,8 @@ entry:
 ; ALL:       .set reorder
 ; ALL:       .reloc 0, R_MIPS_NONE, v_sf
 ; GAS:       la $25, $__fn_local_v_sf
-; IAS:       lw $25, %got($$__fn_local_v_sf)($gp)
-; IAS:       addiu $25, $25, %lo($$__fn_local_v_sf)
+; IAS:       lw $25, %got($__fn_local_v_sf)($gp)
+; IAS:       addiu $25, $25, %lo($__fn_local_v_sf)
 ; ALL:       mfc1 $4, $f12
 ; ALL:       jr $25
 ; ALL:       .end __fn_stub_v_sf
diff --git a/llvm/test/CodeGen/PowerPC/pr59074.ll b/llvm/test/CodeGen/PowerPC/pr59074.ll
new file mode 100644
index 00000000000000..3e328c6ad9f0ba
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/pr59074.ll
@@ -0,0 +1,132 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr7 < %s | FileCheck %s --check-prefix=LE64
+; RUN: llc -mtriple=powerpcle-unknown-linux-gnu -mcpu=pwr7 < %s | FileCheck %s --check-prefix=LE32
+; RUN: llc -mtriple=powerpc64-ibm-aix -mcpu=pwr7 < %s | FileCheck %s --check-prefix=BE64
+; RUN: llc -mtriple=powerpc-ibm-aix -mcpu=pwr7 < %s | FileCheck %s --check-prefix=BE32
+
+; To verify this doesn't crash due to array out of bound.
+define void @pr59074(ptr %0) {
+; LE64-LABEL: pr59074:
+; LE64:       # %bb.0: # %entry
+; LE64-NEXT:    lwz 6, 0(3)
+; LE64-NEXT:    li 7, 12
+; LE64-NEXT:    ld 4, 16(3)
+; LE64-NEXT:    ld 5, 24(3)
+; LE64-NEXT:    addi 6, 6, -12
+; LE64-NEXT:    std 4, 16(3)
+; LE64-NEXT:    std 5, 24(3)
+; LE64-NEXT:    srd 6, 7, 6
+; LE64-NEXT:    li 7, 0
+; LE64-NEXT:    std 7, 8(3)
+; LE64-NEXT:    std 6, 0(3)
+; LE64-NEXT:    blr
+;
+; LE32-LABEL: pr59074:
+; LE32:       # %bb.0: # %entry
+; LE32-NEXT:    stwu 1, -80(1)
+; LE32-NEXT:    .cfi_def_cfa_offset 80
+; LE32-NEXT:    lwz 4, 0(3)
+; LE32-NEXT:    xxlxor 0, 0, 0
+; LE32-NEXT:    li 5, 4
+; LE32-NEXT:    addi 6, 1, 16
+; LE32-NEXT:    li 7, 0
+; LE32-NEXT:    li 8, 12
+; LE32-NEXT:    xxswapd 0, 0
+; LE32-NEXT:    addi 4, 4, -12
+; LE32-NEXT:    rlwinm 9, 4, 29, 28, 31
+; LE32-NEXT:    stxvd2x 0, 6, 5
+; LE32-NEXT:    stw 7, 44(1)
+; LE32-NEXT:    stw 7, 40(1)
+; LE32-NEXT:    stw 7, 36(1)
+; LE32-NEXT:    stw 8, 16(1)
+; LE32-NEXT:    lwzux 5, 9, 6
+; LE32-NEXT:    li 6, 7
+; LE32-NEXT:    lwz 7, 8(9)
+; LE32-NEXT:    nand 6, 4, 6
+; LE32-NEXT:    lwz 8, 4(9)
+; LE32-NEXT:    clrlwi 4, 4, 29
+; LE32-NEXT:    lwz 9, 12(9)
+; LE32-NEXT:    clrlwi 6, 6, 27
+; LE32-NEXT:    subfic 11, 4, 32
+; LE32-NEXT:    srw 5, 5, 4
+; LE32-NEXT:    slwi 10, 7, 1
+; LE32-NEXT:    srw 7, 7, 4
+; LE32-NEXT:    slw 6, 10, 6
+; LE32-NEXT:    srw 10, 8, 4
+; LE32-NEXT:    slw 8, 8, 11
+; LE32-NEXT:    slw 11, 9, 11
+; LE32-NEXT:    srw 4, 9, 4
+; LE32-NEXT:    or 5, 8, 5
+; LE32-NEXT:    or 7, 11, 7
+; LE32-NEXT:    or 6, 10, 6
+; LE32-NEXT:    stw 4, 12(3)
+; LE32-NEXT:    stw 7, 8(3)
+; LE32-NEXT:    stw 5, 0(3)
+; LE32-NEXT:    stw 6, 4(3)
+; LE32-NEXT:    addi 1, 1, 80
+; LE32-NEXT:    blr
+;
+; BE64-LABEL: pr59074:
+; BE64:       # %bb.0: # %entry
+; BE64-NEXT:    lwz 6, 12(3)
+; BE64-NEXT:    li 7, 12
+; BE64-NEXT:    ld 4, 24(3)
+; BE64-NEXT:    ld 5, 16(3)
+; BE64-NEXT:    addi 6, 6, -12
+; BE64-NEXT:    std 4, 24(3)
+; BE64-NEXT:    std 5, 16(3)
+; BE64-NEXT:    srd 6, 7, 6
+; BE64-NEXT:    li 7, 0
+; BE64-NEXT:    std 7, 0(3)
+; BE64-NEXT:    std 6, 8(3)
+; BE64-NEXT:    blr
+;
+; BE32-LABEL: pr59074:
+; BE32:       # %bb.0: # %entry
+; BE32-NEXT:    lwz 4, 12(3)
+; BE32-NEXT:    xxlxor 0, 0, 0
+; BE32-NEXT:    addi 5, 1, -64
+; BE32-NEXT:    li 6, 12
+; BE32-NEXT:    li 7, 0
+; BE32-NEXT:    addi 8, 1, -48
+; BE32-NEXT:    li 10, 7
+; BE32-NEXT:    stxvw4x 0, 0, 5
+; BE32-NEXT:    addi 4, 4, -12
+; BE32-NEXT:    stw 6, -36(1)
+; BE32-NEXT:    stw 7, -40(1)
+; BE32-NEXT:    stw 7, -44(1)
+; BE32-NEXT:    rlwinm 9, 4, 29, 28, 31
+; BE32-NEXT:    stw 7, -48(1)
+; BE32-NEXT:    sub 5, 8, 9
+; BE32-NEXT:    nand 6, 4, 10
+; BE32-NEXT:    clrlwi 4, 4, 29
+; BE32-NEXT:    clrlwi 6, 6, 27
+; BE32-NEXT:    lwz 7, 4(5)
+; BE32-NEXT:    lwz 8, 8(5)
+; BE32-NEXT:    lwz 9, 0(5)
+; BE32-NEXT:    lwz 5, 12(5)
+; BE32-NEXT:    slwi 10, 7, 1
+; BE32-NEXT:    srw 11, 8, 4
+; BE32-NEXT:    srw 7, 7, 4
+; BE32-NEXT:    srw 5, 5, 4
+; BE32-NEXT:    slw 6, 10, 6
+; BE32-NEXT:    subfic 10, 4, 32
+; BE32-NEXT:    srw 4, 9, 4
+; BE32-NEXT:    slw 8, 8, 10
+; BE32-NEXT:    slw 10, 9, 10
+; BE32-NEXT:    or 6, 11, 6
+; BE32-NEXT:    or 7, 10, 7
+; BE32-NEXT:    or 5, 8, 5
+; BE32-NEXT:    stw 4, 0(3)
+; BE32-NEXT:    stw 6, 8(3)
+; BE32-NEXT:    stw 5, 12(3)
+; BE32-NEXT:    stw 7, 4(3)
+; BE32-NEXT:    blr
+entry:
+  %v1 = load <2 x i128>, <2 x i128>* %0
+  %v2 = insertelement <2 x i128> %v1, i128 12, i32 0
+  %v3 = sub <2 x i128> %v1, %v2
+  %v4 = lshr <2 x i128> %v2, %v3
+  store <2 x i128> %v4, <2 x i128>* %0
+  ret void
+}
diff --git a/llvm/test/CodeGen/RISCV/branch-opt.mir b/llvm/test/CodeGen/RISCV/branch-opt.mir
new file mode 100644
index 00000000000000..ba3a20f2fbfcd3
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/branch-opt.mir
@@ -0,0 +1,68 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
+# RUN: llc %s -mtriple=riscv64 -run-pass=peephole-opt -o - | FileCheck %s
+
+# Make sure we shouldn't replace the %2 ADDI with the $x10 ADDI since it has a
+# physical register destination.
+
+--- |
+  define void @foo(i32 signext %0) {
+    tail call void @bar(i32 1)
+    %2 = icmp ugt i32 %0, 1
+    br i1 %2, label %3, label %4
+
+  3:                                                ; preds = %1
+    tail call void @bar(i32 3)
+    ret void
+
+  4:                                                ; preds = %1
+    ret void
+  }
+
+  declare void @bar(...)
+
+...
+---
+name:            foo
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: foo
+  ; CHECK: bb.0 (%ir-block.1):
+  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT:   liveins: $x10
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:gpr = COPY $x10
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def dead $x2, implicit $x2
+  ; CHECK-NEXT:   $x10 = ADDI $x0, 1
+  ; CHECK-NEXT:   PseudoCALL target-flags(riscv-call) @bar, csr_ilp32_lp64, implicit-def dead $x1, implicit $x10, implicit-def $x2
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def dead $x2, implicit $x2
+  ; CHECK-NEXT:   [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 2
+  ; CHECK-NEXT:   BLTU [[COPY]], killed [[ADDI]], %bb.2
+  ; CHECK-NEXT:   PseudoBR %bb.1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1 (%ir-block.3):
+  ; CHECK-NEXT:   $x10 = ADDI $x0, 3
+  ; CHECK-NEXT:   PseudoTAIL target-flags(riscv-call) @bar, implicit $x2, implicit $x10
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2 (%ir-block.4):
+  ; CHECK-NEXT:   PseudoRET
+  bb.0 (%ir-block.1):
+    successors: %bb.1, %bb.2
+    liveins: $x10
+
+    %0:gpr = COPY $x10
+    ADJCALLSTACKDOWN 0, 0, implicit-def dead $x2, implicit $x2
+    $x10 = ADDI $x0, 1
+    PseudoCALL target-flags(riscv-call) @bar, csr_ilp32_lp64, implicit-def dead $x1, implicit $x10, implicit-def $x2
+    ADJCALLSTACKUP 0, 0, implicit-def dead $x2, implicit $x2
+    %2:gpr = ADDI $x0, 2
+    BLTU %0, killed %2, %bb.2
+    PseudoBR %bb.1
+
+  bb.1 (%ir-block.3):
+    $x10 = ADDI $x0, 3
+    PseudoTAIL target-flags(riscv-call) @bar, implicit $x2, implicit $x10
+
+  bb.2 (%ir-block.4):
+    PseudoRET
+
+...
diff --git a/llvm/test/CodeGen/RISCV/condops.ll b/llvm/test/CodeGen/RISCV/condops.ll
index 8e53782b5dcd78..101cb5aeeb0940 100644
--- a/llvm/test/CodeGen/RISCV/condops.ll
+++ b/llvm/test/CodeGen/RISCV/condops.ll
@@ -3719,3 +3719,54 @@ entry:
   %cond = select i1 %tobool.not, i64 0, i64 %x
   ret i64 %cond
 }
+
+; Test that we don't crash on types larger than 64 bits.
+define i64 @single_bit3(i80 %x, i64 %y) {
+; RV32I-LABEL: single_bit3:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    lw a0, 8(a0)
+; RV32I-NEXT:    slli a0, a0, 31
+; RV32I-NEXT:    srai a3, a0, 31
+; RV32I-NEXT:    and a0, a3, a1
+; RV32I-NEXT:    and a1, a3, a2
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: single_bit3:
+; RV64I:       # %bb.0: # %entry
+; RV64I-NEXT:    slli a1, a1, 63
+; RV64I-NEXT:    srai a0, a1, 63
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    ret
+;
+; RV64XVENTANACONDOPS-LABEL: single_bit3:
+; RV64XVENTANACONDOPS:       # %bb.0: # %entry
+; RV64XVENTANACONDOPS-NEXT:    andi a1, a1, 1
+; RV64XVENTANACONDOPS-NEXT:    vt.maskc a0, a2, a1
+; RV64XVENTANACONDOPS-NEXT:    ret
+;
+; RV64XTHEADCONDMOV-LABEL: single_bit3:
+; RV64XTHEADCONDMOV:       # %bb.0: # %entry
+; RV64XTHEADCONDMOV-NEXT:    slli a1, a1, 63
+; RV64XTHEADCONDMOV-NEXT:    srai a0, a1, 63
+; RV64XTHEADCONDMOV-NEXT:    and a0, a0, a2
+; RV64XTHEADCONDMOV-NEXT:    ret
+;
+; RV32ZICOND-LABEL: single_bit3:
+; RV32ZICOND:       # %bb.0: # %entry
+; RV32ZICOND-NEXT:    lw a0, 8(a0)
+; RV32ZICOND-NEXT:    andi a3, a0, 1
+; RV32ZICOND-NEXT:    czero.eqz a0, a1, a3
+; RV32ZICOND-NEXT:    czero.eqz a1, a2, a3
+; RV32ZICOND-NEXT:    ret
+;
+; RV64ZICOND-LABEL: single_bit3:
+; RV64ZICOND:       # %bb.0: # %entry
+; RV64ZICOND-NEXT:    andi a1, a1, 1
+; RV64ZICOND-NEXT:    czero.eqz a0, a2, a1
+; RV64ZICOND-NEXT:    ret
+entry:
+  %and = and i80 %x, 18446744073709551616 ; 1 << 64
+  %tobool.not = icmp eq i80 %and, 0
+  %cond = select i1 %tobool.not, i64 0, i64 %y
+  ret i64 %cond
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll
index dab530751ef96b..799aebcaa63026 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll
@@ -238,26 +238,39 @@ define <64 x half> @interleave_v32f16(<32 x half> %x, <32 x half> %y) {
 define <64 x float> @interleave_v32f32(<32 x float> %x, <32 x float> %y) {
 ; V128-LABEL: interleave_v32f32:
 ; V128:       # %bb.0:
-; V128-NEXT:    vsetivli zero, 16, e32, m8, ta, ma
-; V128-NEXT:    vslidedown.vi v0, v8, 16
-; V128-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; V128-NEXT:    vwaddu.vv v24, v0, v8
-; V128-NEXT:    li a0, -1
-; V128-NEXT:    vwmaccu.vx v24, a0, v8
-; V128-NEXT:    lui a1, %hi(.LCPI10_0)
-; V128-NEXT:    addi a1, a1, %lo(.LCPI10_0)
-; V128-NEXT:    li a2, 32
-; V128-NEXT:    vsetvli zero, a2, e32, m8, ta, mu
-; V128-NEXT:    vle16.v v12, (a1)
-; V128-NEXT:    lui a1, 699051
-; V128-NEXT:    addi a1, a1, -1366
-; V128-NEXT:    vmv.s.x v0, a1
+; V128-NEXT:    addi sp, sp, -16
+; V128-NEXT:    .cfi_def_cfa_offset 16
+; V128-NEXT:    csrr a0, vlenb
+; V128-NEXT:    slli a0, a0, 2
+; V128-NEXT:    sub sp, sp, a0
+; V128-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb
+; V128-NEXT:    lui a0, %hi(.LCPI10_0)
+; V128-NEXT:    addi a0, a0, %lo(.LCPI10_0)
+; V128-NEXT:    li a1, 32
+; V128-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
+; V128-NEXT:    vle16.v v4, (a0)
+; V128-NEXT:    lui a0, %hi(.LCPI10_1)
+; V128-NEXT:    addi a0, a0, %lo(.LCPI10_1)
+; V128-NEXT:    vle16.v v24, (a0)
+; V128-NEXT:    addi a0, sp, 16
+; V128-NEXT:    vs4r.v v24, (a0) # Unknown-size Folded Spill
+; V128-NEXT:    lui a0, 699051
+; V128-NEXT:    addi a0, a0, -1366
+; V128-NEXT:    vmv.s.x v0, a0
+; V128-NEXT:    vrgatherei16.vv v24, v8, v4
+; V128-NEXT:    addi a0, sp, 16
+; V128-NEXT:    vl4r.v v12, (a0) # Unknown-size Folded Reload
 ; V128-NEXT:    vrgatherei16.vv v24, v16, v12, v0.t
 ; V128-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
 ; V128-NEXT:    vwaddu.vv v0, v8, v16
+; V128-NEXT:    li a0, -1
 ; V128-NEXT:    vwmaccu.vx v0, a0, v16
 ; V128-NEXT:    vmv8r.v v8, v0
 ; V128-NEXT:    vmv8r.v v16, v24
+; V128-NEXT:    csrr a0, vlenb
+; V128-NEXT:    slli a0, a0, 2
+; V128-NEXT:    add sp, sp, a0
+; V128-NEXT:    addi sp, sp, 16
 ; V128-NEXT:    ret
 ;
 ; V512-LABEL: interleave_v32f32:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll
index 9e21cc9e3d624a..e1bd16649eede7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll
@@ -188,30 +188,24 @@ define <4 x i32> @interleave_v4i32_offset_2(<4 x i32> %x, <4 x i32> %y) {
 define <4 x i32> @interleave_v4i32_offset_1(<4 x i32> %x, <4 x i32> %y) {
 ; V128-LABEL: interleave_v4i32_offset_1:
 ; V128:       # %bb.0:
-; V128-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; V128-NEXT:    vwaddu.vv v10, v8, v8
-; V128-NEXT:    li a0, -1
-; V128-NEXT:    vwmaccu.vx v10, a0, v8
 ; V128-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
-; V128-NEXT:    vid.v v8
-; V128-NEXT:    vsrl.vi v8, v8, 1
+; V128-NEXT:    vid.v v10
+; V128-NEXT:    vsrl.vi v11, v10, 1
+; V128-NEXT:    vrgather.vv v10, v8, v11
 ; V128-NEXT:    vmv.v.i v0, 10
-; V128-NEXT:    vadd.vi v8, v8, 1
+; V128-NEXT:    vadd.vi v8, v11, 1
 ; V128-NEXT:    vrgather.vv v10, v9, v8, v0.t
 ; V128-NEXT:    vmv.v.v v8, v10
 ; V128-NEXT:    ret
 ;
 ; V512-LABEL: interleave_v4i32_offset_1:
 ; V512:       # %bb.0:
-; V512-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; V512-NEXT:    vwaddu.vv v10, v8, v8
-; V512-NEXT:    li a0, -1
-; V512-NEXT:    vwmaccu.vx v10, a0, v8
 ; V512-NEXT:    vsetivli zero, 4, e32, mf2, ta, mu
-; V512-NEXT:    vid.v v8
-; V512-NEXT:    vsrl.vi v8, v8, 1
+; V512-NEXT:    vid.v v10
+; V512-NEXT:    vsrl.vi v11, v10, 1
+; V512-NEXT:    vrgather.vv v10, v8, v11
 ; V512-NEXT:    vmv.v.i v0, 10
-; V512-NEXT:    vadd.vi v8, v8, 1
+; V512-NEXT:    vadd.vi v8, v11, 1
 ; V512-NEXT:    vrgather.vv v10, v9, v8, v0.t
 ; V512-NEXT:    vmv1r.v v8, v10
 ; V512-NEXT:    ret
@@ -403,26 +397,39 @@ define <64 x i16> @interleave_v32i16(<32 x i16> %x, <32 x i16> %y) {
 define <64 x i32> @interleave_v32i32(<32 x i32> %x, <32 x i32> %y) {
 ; V128-LABEL: interleave_v32i32:
 ; V128:       # %bb.0:
-; V128-NEXT:    vsetivli zero, 16, e32, m8, ta, ma
-; V128-NEXT:    vslidedown.vi v0, v8, 16
-; V128-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; V128-NEXT:    vwaddu.vv v24, v0, v8
-; V128-NEXT:    li a0, -1
-; V128-NEXT:    vwmaccu.vx v24, a0, v8
-; V128-NEXT:    lui a1, %hi(.LCPI17_0)
-; V128-NEXT:    addi a1, a1, %lo(.LCPI17_0)
-; V128-NEXT:    li a2, 32
-; V128-NEXT:    vsetvli zero, a2, e32, m8, ta, mu
-; V128-NEXT:    vle16.v v12, (a1)
-; V128-NEXT:    lui a1, 699051
-; V128-NEXT:    addi a1, a1, -1366
-; V128-NEXT:    vmv.s.x v0, a1
+; V128-NEXT:    addi sp, sp, -16
+; V128-NEXT:    .cfi_def_cfa_offset 16
+; V128-NEXT:    csrr a0, vlenb
+; V128-NEXT:    slli a0, a0, 2
+; V128-NEXT:    sub sp, sp, a0
+; V128-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb
+; V128-NEXT:    lui a0, %hi(.LCPI17_0)
+; V128-NEXT:    addi a0, a0, %lo(.LCPI17_0)
+; V128-NEXT:    li a1, 32
+; V128-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
+; V128-NEXT:    vle16.v v4, (a0)
+; V128-NEXT:    lui a0, %hi(.LCPI17_1)
+; V128-NEXT:    addi a0, a0, %lo(.LCPI17_1)
+; V128-NEXT:    vle16.v v24, (a0)
+; V128-NEXT:    addi a0, sp, 16
+; V128-NEXT:    vs4r.v v24, (a0) # Unknown-size Folded Spill
+; V128-NEXT:    lui a0, 699051
+; V128-NEXT:    addi a0, a0, -1366
+; V128-NEXT:    vmv.s.x v0, a0
+; V128-NEXT:    vrgatherei16.vv v24, v8, v4
+; V128-NEXT:    addi a0, sp, 16
+; V128-NEXT:    vl4r.v v12, (a0) # Unknown-size Folded Reload
 ; V128-NEXT:    vrgatherei16.vv v24, v16, v12, v0.t
 ; V128-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
 ; V128-NEXT:    vwaddu.vv v0, v8, v16
+; V128-NEXT:    li a0, -1
 ; V128-NEXT:    vwmaccu.vx v0, a0, v16
 ; V128-NEXT:    vmv8r.v v8, v0
 ; V128-NEXT:    vmv8r.v v16, v24
+; V128-NEXT:    csrr a0, vlenb
+; V128-NEXT:    slli a0, a0, 2
+; V128-NEXT:    add sp, sp, a0
+; V128-NEXT:    addi sp, sp, 16
 ; V128-NEXT:    ret
 ;
 ; V512-LABEL: interleave_v32i32:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
index a26a87a1f3c139..a56a81f5f793bc 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
@@ -612,11 +612,13 @@ define <8 x i8> @concat_4xi8_start_undef_at_start(<8 x i8> %v, <8 x i8> %w) {
 ; CHECK-LABEL: concat_4xi8_start_undef_at_start:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, mu
-; CHECK-NEXT:    vid.v v10
+; CHECK-NEXT:    vid.v v11
+; CHECK-NEXT:    vrgather.vv v10, v8, v11
 ; CHECK-NEXT:    li a0, 224
 ; CHECK-NEXT:    vmv.s.x v0, a0
-; CHECK-NEXT:    vadd.vi v10, v10, -4
-; CHECK-NEXT:    vrgather.vv v8, v9, v10, v0.t
+; CHECK-NEXT:    vadd.vi v8, v11, -4
+; CHECK-NEXT:    vrgather.vv v10, v9, v8, v0.t
+; CHECK-NEXT:    vmv1r.v v8, v10
 ; CHECK-NEXT:    ret
   %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 9, i32 10, i32 11>
   ret <8 x i8> %res
@@ -626,11 +628,13 @@ define <8 x i8> @merge_start_into_end_non_contiguous(<8 x i8> %v, <8 x i8> %w) {
 ; CHECK-LABEL: merge_start_into_end_non_contiguous:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, mu
-; CHECK-NEXT:    vid.v v10
+; CHECK-NEXT:    vid.v v11
+; CHECK-NEXT:    vrgather.vv v10, v8, v11
 ; CHECK-NEXT:    li a0, 144
 ; CHECK-NEXT:    vmv.s.x v0, a0
-; CHECK-NEXT:    vadd.vi v10, v10, -4
-; CHECK-NEXT:    vrgather.vv v8, v9, v10, v0.t
+; CHECK-NEXT:    vadd.vi v8, v11, -4
+; CHECK-NEXT:    vrgather.vv v10, v9, v8, v0.t
+; CHECK-NEXT:    vmv1r.v v8, v10
 ; CHECK-NEXT:    ret
   %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 5, i32 6, i32 11>
   ret <8 x i8> %res
@@ -671,11 +675,13 @@ define <8 x i8> @merge_slidedown(<8 x i8> %v, <8 x i8> %w) {
 ; CHECK-LABEL: merge_slidedown:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, mu
-; CHECK-NEXT:    vslidedown.vi v8, v8, 1
+; CHECK-NEXT:    vid.v v11
+; CHECK-NEXT:    vadd.vi v12, v11, 1
 ; CHECK-NEXT:    li a0, 195
 ; CHECK-NEXT:    vmv.s.x v0, a0
-; CHECK-NEXT:    vid.v v10
-; CHECK-NEXT:    vrgather.vv v8, v9, v10, v0.t
+; CHECK-NEXT:    vrgather.vv v10, v8, v12
+; CHECK-NEXT:    vrgather.vv v10, v9, v11, v0.t
+; CHECK-NEXT:    vmv1r.v v8, v10
 ; CHECK-NEXT:    ret
   %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 8, i32 9, i32 3, i32 4, i32 5, i32 6, i32 14, i32 15>
   ret <8 x i8> %res
@@ -686,12 +692,14 @@ define <8 x i8> @merge_non_contiguous_slideup_slidedown(<8 x i8> %v, <8 x i8> %w
 ; CHECK-LABEL: merge_non_contiguous_slideup_slidedown:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, mu
-; CHECK-NEXT:    vid.v v10
-; CHECK-NEXT:    vadd.vi v10, v10, -1
+; CHECK-NEXT:    vid.v v11
+; CHECK-NEXT:    vadd.vi v12, v11, 2
+; CHECK-NEXT:    vrgather.vv v10, v8, v12
 ; CHECK-NEXT:    li a0, 234
 ; CHECK-NEXT:    vmv.s.x v0, a0
-; CHECK-NEXT:    vslidedown.vi v8, v8, 2
-; CHECK-NEXT:    vrgather.vv v8, v9, v10, v0.t
+; CHECK-NEXT:    vadd.vi v8, v11, -1
+; CHECK-NEXT:    vrgather.vv v10, v9, v8, v0.t
+; CHECK-NEXT:    vmv1r.v v8, v10
 ; CHECK-NEXT:    ret
   %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 2, i32 8, i32 4, i32 10, i32 6, i32 12, i32 13, i32 14>
   ret <8 x i8> %res
@@ -702,13 +710,16 @@ define <8 x i8> @unmergable(<8 x i8> %v, <8 x i8> %w) {
 ; CHECK-LABEL: unmergable:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, mu
+; CHECK-NEXT:    vid.v v10
+; CHECK-NEXT:    vadd.vi v11, v10, 2
 ; CHECK-NEXT:    lui a0, %hi(.LCPI46_0)
 ; CHECK-NEXT:    addi a0, a0, %lo(.LCPI46_0)
-; CHECK-NEXT:    vle8.v v10, (a0)
+; CHECK-NEXT:    vle8.v v12, (a0)
 ; CHECK-NEXT:    li a0, 234
 ; CHECK-NEXT:    vmv.s.x v0, a0
-; CHECK-NEXT:    vslidedown.vi v8, v8, 2
-; CHECK-NEXT:    vrgather.vv v8, v9, v10, v0.t
+; CHECK-NEXT:    vrgather.vv v10, v8, v11
+; CHECK-NEXT:    vrgather.vv v10, v9, v12, v0.t
+; CHECK-NEXT:    vmv1r.v v8, v10
 ; CHECK-NEXT:    ret
   %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 2, i32 9, i32 4, i32 11, i32 6, i32 13, i32 8, i32 15>
   ret <8 x i8> %res
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
index f889041647b235..eeb8e517d01d2d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
@@ -8,51 +8,23 @@
 
 ; FIXME: This should be widened to a vlseg2 of <4 x i32> with VL set to 3
 define {<3 x i32>, <3 x i32>} @load_factor2_v3(ptr %ptr) {
-; RV32-LABEL: load_factor2_v3:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
-; RV32-NEXT:    vle32.v v10, (a0)
-; RV32-NEXT:    vsetivli zero, 2, e32, m1, ta, ma
-; RV32-NEXT:    vslidedown.vi v9, v10, 2
-; RV32-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; RV32-NEXT:    vwaddu.vv v8, v10, v9
-; RV32-NEXT:    li a0, -1
-; RV32-NEXT:    vwmaccu.vx v8, a0, v9
-; RV32-NEXT:    vmv.v.i v0, 4
-; RV32-NEXT:    vsetivli zero, 4, e32, m2, ta, ma
-; RV32-NEXT:    vslidedown.vi v12, v10, 4
-; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
-; RV32-NEXT:    vrgather.vi v8, v12, 0, v0.t
-; RV32-NEXT:    vid.v v9
-; RV32-NEXT:    vadd.vv v9, v9, v9
-; RV32-NEXT:    vadd.vi v11, v9, 1
-; RV32-NEXT:    vrgather.vv v9, v10, v11
-; RV32-NEXT:    vrgather.vi v9, v12, 1, v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: load_factor2_v3:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
-; RV64-NEXT:    vle32.v v10, (a0)
-; RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV64-NEXT:    vid.v v8
-; RV64-NEXT:    vadd.vv v8, v8, v8
-; RV64-NEXT:    vadd.vi v8, v8, 1
-; RV64-NEXT:    vrgather.vv v9, v10, v8
-; RV64-NEXT:    vmv.v.i v0, 4
-; RV64-NEXT:    vsetivli zero, 4, e32, m2, ta, ma
-; RV64-NEXT:    vslidedown.vi v12, v10, 4
-; RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
-; RV64-NEXT:    vrgather.vi v9, v12, 1, v0.t
-; RV64-NEXT:    vsetivli zero, 2, e32, m1, ta, ma
-; RV64-NEXT:    vslidedown.vi v11, v10, 2
-; RV64-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; RV64-NEXT:    vwaddu.vv v8, v10, v11
-; RV64-NEXT:    li a0, -1
-; RV64-NEXT:    vwmaccu.vx v8, a0, v11
-; RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
-; RV64-NEXT:    vrgather.vi v8, v12, 0, v0.t
-; RV64-NEXT:    ret
+; CHECK-LABEL: load_factor2_v3:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
+; CHECK-NEXT:    vle32.v v10, (a0)
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT:    vid.v v8
+; CHECK-NEXT:    vadd.vv v9, v8, v8
+; CHECK-NEXT:    vrgather.vv v8, v10, v9
+; CHECK-NEXT:    vmv.v.i v0, 4
+; CHECK-NEXT:    vsetivli zero, 4, e32, m2, ta, ma
+; CHECK-NEXT:    vslidedown.vi v12, v10, 4
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
+; CHECK-NEXT:    vrgather.vi v8, v12, 0, v0.t
+; CHECK-NEXT:    vadd.vi v11, v9, 1
+; CHECK-NEXT:    vrgather.vv v9, v10, v11
+; CHECK-NEXT:    vrgather.vi v9, v12, 1, v0.t
+; CHECK-NEXT:    ret
   %interleaved.vec = load <6 x i32>, ptr %ptr
   %v0 = shufflevector <6 x i32> %interleaved.vec, <6 x i32> poison, <3 x i32> <i32 0, i32 2, i32 4>
   %v1 = shufflevector <6 x i32> %interleaved.vec, <6 x i32> poison, <3 x i32> <i32 1, i32 3, i32 5>
@@ -159,142 +131,163 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
 ; RV32-NEXT:    addi sp, sp, -16
 ; RV32-NEXT:    .cfi_def_cfa_offset 16
 ; RV32-NEXT:    csrr a2, vlenb
-; RV32-NEXT:    li a3, 58
+; RV32-NEXT:    li a3, 62
 ; RV32-NEXT:    mul a2, a2, a3
 ; RV32-NEXT:    sub sp, sp, a2
-; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x3a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 58 * vlenb
-; RV32-NEXT:    addi a3, a1, 256
+; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x3e, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 62 * vlenb
+; RV32-NEXT:    addi a3, a1, 128
+; RV32-NEXT:    addi a4, a1, 256
 ; RV32-NEXT:    li a2, 32
 ; RV32-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
-; RV32-NEXT:    vle32.v v8, (a3)
-; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    li a4, 25
-; RV32-NEXT:    mul a3, a3, a4
-; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 16
-; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
-; RV32-NEXT:    addi a3, a1, 128
-; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; RV32-NEXT:    vslideup.vi v16, v8, 4
+; RV32-NEXT:    vle32.v v16, (a4)
 ; RV32-NEXT:    csrr a4, vlenb
-; RV32-NEXT:    li a5, 12
+; RV32-NEXT:    li a5, 29
 ; RV32-NEXT:    mul a4, a4, a5
 ; RV32-NEXT:    add a4, sp, a4
 ; RV32-NEXT:    addi a4, a4, 16
-; RV32-NEXT:    vs4r.v v16, (a4) # Unknown-size Folded Spill
-; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; RV32-NEXT:    vid.v v20
-; RV32-NEXT:    vadd.vi v4, v20, -10
-; RV32-NEXT:    vmv.v.v v2, v20
+; RV32-NEXT:    vs8r.v v16, (a4) # Unknown-size Folded Spill
+; RV32-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; RV32-NEXT:    vid.v v10
 ; RV32-NEXT:    csrr a4, vlenb
-; RV32-NEXT:    slli a5, a4, 4
+; RV32-NEXT:    slli a5, a4, 3
 ; RV32-NEXT:    add a4, a5, a4
 ; RV32-NEXT:    add a4, sp, a4
 ; RV32-NEXT:    addi a4, a4, 16
-; RV32-NEXT:    vs2r.v v20, (a4) # Unknown-size Folded Spill
+; RV32-NEXT:    vs2r.v v10, (a4) # Unknown-size Folded Spill
+; RV32-NEXT:    vadd.vi v8, v10, -4
+; RV32-NEXT:    csrr a4, vlenb
+; RV32-NEXT:    li a5, 13
+; RV32-NEXT:    mul a4, a4, a5
+; RV32-NEXT:    add a4, sp, a4
+; RV32-NEXT:    addi a4, a4, 16
+; RV32-NEXT:    vs2r.v v8, (a4) # Unknown-size Folded Spill
+; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; RV32-NEXT:    vrgatherei16.vv v12, v16, v8
+; RV32-NEXT:    csrr a4, vlenb
+; RV32-NEXT:    li a5, 21
+; RV32-NEXT:    mul a4, a4, a5
+; RV32-NEXT:    add a4, sp, a4
+; RV32-NEXT:    addi a4, a4, 16
+; RV32-NEXT:    vs4r.v v12, (a4) # Unknown-size Folded Spill
+; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; RV32-NEXT:    vadd.vi v8, v10, -10
 ; RV32-NEXT:    lui a4, 12
-; RV32-NEXT:    vmv.s.x v1, a4
-; RV32-NEXT:    vsetivli zero, 16, e32, m8, ta, ma
-; RV32-NEXT:    vslidedown.vi v8, v8, 16
+; RV32-NEXT:    vmv.s.x v0, a4
 ; RV32-NEXT:    csrr a4, vlenb
-; RV32-NEXT:    slli a5, a4, 5
-; RV32-NEXT:    add a4, a5, a4
+; RV32-NEXT:    slli a4, a4, 3
 ; RV32-NEXT:    add a4, sp, a4
 ; RV32-NEXT:    addi a4, a4, 16
-; RV32-NEXT:    vs8r.v v8, (a4) # Unknown-size Folded Spill
-; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, mu
-; RV32-NEXT:    vmv1r.v v0, v1
+; RV32-NEXT:    vs1r.v v0, (a4) # Unknown-size Folded Spill
+; RV32-NEXT:    vsetivli zero, 16, e32, m8, ta, ma
+; RV32-NEXT:    vslidedown.vi v16, v16, 16
 ; RV32-NEXT:    csrr a4, vlenb
-; RV32-NEXT:    slli a4, a4, 4
+; RV32-NEXT:    li a5, 45
+; RV32-NEXT:    mul a4, a4, a5
 ; RV32-NEXT:    add a4, sp, a4
 ; RV32-NEXT:    addi a4, a4, 16
-; RV32-NEXT:    vs1r.v v1, (a4) # Unknown-size Folded Spill
-; RV32-NEXT:    vrgatherei16.vv v16, v8, v4, v0.t
+; RV32-NEXT:    vs8r.v v16, (a4) # Unknown-size Folded Spill
+; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, mu
+; RV32-NEXT:    vrgatherei16.vv v12, v16, v8, v0.t
 ; RV32-NEXT:    csrr a4, vlenb
-; RV32-NEXT:    li a5, 21
+; RV32-NEXT:    li a5, 25
 ; RV32-NEXT:    mul a4, a4, a5
 ; RV32-NEXT:    add a4, sp, a4
 ; RV32-NEXT:    addi a4, a4, 16
-; RV32-NEXT:    vs4r.v v16, (a4) # Unknown-size Folded Spill
+; RV32-NEXT:    vs4r.v v12, (a4) # Unknown-size Folded Spill
 ; RV32-NEXT:    lui a4, %hi(.LCPI6_0)
 ; RV32-NEXT:    addi a4, a4, %lo(.LCPI6_0)
 ; RV32-NEXT:    vsetvli zero, a2, e32, m8, ta, mu
+; RV32-NEXT:    lui a5, %hi(.LCPI6_1)
+; RV32-NEXT:    addi a5, a5, %lo(.LCPI6_1)
+; RV32-NEXT:    lui a6, 1
 ; RV32-NEXT:    vle16.v v8, (a4)
+; RV32-NEXT:    addi a4, sp, 16
+; RV32-NEXT:    vs4r.v v8, (a4) # Unknown-size Folded Spill
+; RV32-NEXT:    vle16.v v8, (a5)
 ; RV32-NEXT:    csrr a4, vlenb
 ; RV32-NEXT:    slli a4, a4, 2
 ; RV32-NEXT:    add a4, sp, a4
 ; RV32-NEXT:    addi a4, a4, 16
 ; RV32-NEXT:    vs4r.v v8, (a4) # Unknown-size Folded Spill
-; RV32-NEXT:    lui a4, %hi(.LCPI6_1)
-; RV32-NEXT:    addi a4, a4, %lo(.LCPI6_1)
-; RV32-NEXT:    lui a5, 1
-; RV32-NEXT:    vle16.v v8, (a4)
-; RV32-NEXT:    addi a4, sp, 16
-; RV32-NEXT:    vs4r.v v8, (a4) # Unknown-size Folded Spill
 ; RV32-NEXT:    vle32.v v16, (a1)
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a4, 49
+; RV32-NEXT:    li a4, 37
 ; RV32-NEXT:    mul a1, a1, a4
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
 ; RV32-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
 ; RV32-NEXT:    vle32.v v24, (a3)
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a3, 41
+; RV32-NEXT:    li a3, 53
 ; RV32-NEXT:    mul a1, a1, a3
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
 ; RV32-NEXT:    vs8r.v v24, (a1) # Unknown-size Folded Spill
-; RV32-NEXT:    addi a1, a5, -64
+; RV32-NEXT:    addi a1, a6, -64
 ; RV32-NEXT:    vmv.s.x v0, a1
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    slli a1, a1, 3
+; RV32-NEXT:    slli a3, a1, 4
+; RV32-NEXT:    add a1, a3, a1
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
 ; RV32-NEXT:    vs1r.v v0, (a1) # Unknown-size Folded Spill
+; RV32-NEXT:    addi a1, sp, 16
+; RV32-NEXT:    vl4r.v v4, (a1) # Unknown-size Folded Reload
+; RV32-NEXT:    vrgatherei16.vv v8, v16, v4
 ; RV32-NEXT:    csrr a1, vlenb
 ; RV32-NEXT:    slli a1, a1, 2
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
-; RV32-NEXT:    vl4r.v v4, (a1) # Unknown-size Folded Reload
-; RV32-NEXT:    vrgatherei16.vv v8, v16, v4
-; RV32-NEXT:    addi a1, sp, 16
 ; RV32-NEXT:    vl4r.v v16, (a1) # Unknown-size Folded Reload
 ; RV32-NEXT:    vrgatherei16.vv v8, v24, v16, v0.t
 ; RV32-NEXT:    vsetivli zero, 12, e32, m4, tu, ma
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a3, 21
+; RV32-NEXT:    li a3, 25
 ; RV32-NEXT:    mul a1, a1, a3
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
 ; RV32-NEXT:    vl4r.v v12, (a1) # Unknown-size Folded Reload
 ; RV32-NEXT:    vmv.v.v v12, v8
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a3, 21
+; RV32-NEXT:    li a3, 25
 ; RV32-NEXT:    mul a1, a1, a3
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
 ; RV32-NEXT:    vs4r.v v12, (a1) # Unknown-size Folded Spill
-; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
+; RV32-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a3, 25
+; RV32-NEXT:    slli a3, a1, 3
+; RV32-NEXT:    add a1, a3, a1
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vl2r.v v10, (a1) # Unknown-size Folded Reload
+; RV32-NEXT:    vadd.vi v8, v10, -2
+; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    li a3, 29
 ; RV32-NEXT:    mul a1, a1, a3
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
-; RV32-NEXT:    vl8r.v v8, (a1) # Unknown-size Folded Reload
-; RV32-NEXT:    vslideup.vi v12, v8, 2
+; RV32-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
+; RV32-NEXT:    vrgatherei16.vv v12, v16, v8
 ; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; RV32-NEXT:    vadd.vi v8, v2, -8
+; RV32-NEXT:    vadd.vi v8, v10, -8
+; RV32-NEXT:    vmv2r.v v30, v10
 ; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32-NEXT:    vmv1r.v v0, v1
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    slli a3, a1, 5
-; RV32-NEXT:    add a1, a3, a1
+; RV32-NEXT:    slli a1, a1, 3
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vl1r.v v28, (a1) # Unknown-size Folded Reload
+; RV32-NEXT:    vmv1r.v v0, v28
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    li a3, 45
+; RV32-NEXT:    mul a1, a1, a3
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
 ; RV32-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
 ; RV32-NEXT:    vrgatherei16.vv v12, v16, v8, v0.t
-; RV32-NEXT:    vmv.v.v v20, v12
+; RV32-NEXT:    vmv.v.v v24, v12
 ; RV32-NEXT:    lui a1, %hi(.LCPI6_2)
 ; RV32-NEXT:    addi a1, a1, %lo(.LCPI6_2)
 ; RV32-NEXT:    vsetvli zero, a2, e32, m8, ta, mu
@@ -308,165 +301,166 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
 ; RV32-NEXT:    addi a1, a1, 16
 ; RV32-NEXT:    vs4r.v v8, (a1) # Unknown-size Folded Spill
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a3, 49
+; RV32-NEXT:    li a3, 37
 ; RV32-NEXT:    mul a1, a1, a3
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
 ; RV32-NEXT:    vl8r.v v0, (a1) # Unknown-size Folded Reload
 ; RV32-NEXT:    vrgatherei16.vv v8, v0, v16
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    slli a1, a1, 3
+; RV32-NEXT:    slli a3, a1, 4
+; RV32-NEXT:    add a1, a3, a1
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
 ; RV32-NEXT:    vl1r.v v0, (a1) # Unknown-size Folded Reload
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a3, 41
+; RV32-NEXT:    li a3, 53
 ; RV32-NEXT:    mul a1, a1, a3
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
-; RV32-NEXT:    vl8r.v v24, (a1) # Unknown-size Folded Reload
+; RV32-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
 ; RV32-NEXT:    csrr a1, vlenb
 ; RV32-NEXT:    slli a1, a1, 2
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
-; RV32-NEXT:    vl4r.v v16, (a1) # Unknown-size Folded Reload
-; RV32-NEXT:    vrgatherei16.vv v8, v24, v16, v0.t
+; RV32-NEXT:    vl4r.v v4, (a1) # Unknown-size Folded Reload
+; RV32-NEXT:    vrgatherei16.vv v8, v16, v4, v0.t
 ; RV32-NEXT:    vsetivli zero, 12, e32, m4, tu, ma
-; RV32-NEXT:    vmv.v.v v20, v8
+; RV32-NEXT:    vmv.v.v v24, v8
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    slli a1, a1, 3
+; RV32-NEXT:    slli a3, a1, 4
+; RV32-NEXT:    add a1, a3, a1
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
-; RV32-NEXT:    vs4r.v v20, (a1) # Unknown-size Folded Spill
+; RV32-NEXT:    vs4r.v v24, (a1) # Unknown-size Folded Spill
 ; RV32-NEXT:    lui a1, %hi(.LCPI6_4)
 ; RV32-NEXT:    addi a1, a1, %lo(.LCPI6_4)
 ; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
 ; RV32-NEXT:    vle16.v v8, (a1)
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a3, 25
+; RV32-NEXT:    li a3, 29
 ; RV32-NEXT:    mul a1, a1, a3
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
 ; RV32-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
-; RV32-NEXT:    vrgatherei16.vv v24, v16, v8
+; RV32-NEXT:    vrgatherei16.vv v4, v16, v8
 ; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; RV32-NEXT:    vadd.vi v8, v30, -6
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    slli a3, a1, 4
-; RV32-NEXT:    add a1, a3, a1
+; RV32-NEXT:    slli a1, a1, 2
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
-; RV32-NEXT:    vl2r.v v8, (a1) # Unknown-size Folded Reload
-; RV32-NEXT:    vadd.vi v8, v8, -6
+; RV32-NEXT:    vs2r.v v8, (a1) # Unknown-size Folded Spill
 ; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
+; RV32-NEXT:    vmv1r.v v0, v28
+; RV32-NEXT:    vmv1r.v v2, v28
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    slli a1, a1, 4
-; RV32-NEXT:    add a1, sp, a1
-; RV32-NEXT:    addi a1, a1, 16
-; RV32-NEXT:    vl1r.v v0, (a1) # Unknown-size Folded Reload
-; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    slli a3, a1, 5
-; RV32-NEXT:    add a1, a3, a1
+; RV32-NEXT:    li a3, 45
+; RV32-NEXT:    mul a1, a1, a3
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
 ; RV32-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
-; RV32-NEXT:    vrgatherei16.vv v24, v16, v8, v0.t
+; RV32-NEXT:    vrgatherei16.vv v4, v16, v8, v0.t
 ; RV32-NEXT:    lui a1, %hi(.LCPI6_5)
 ; RV32-NEXT:    addi a1, a1, %lo(.LCPI6_5)
 ; RV32-NEXT:    vsetvli zero, a2, e32, m8, ta, mu
 ; RV32-NEXT:    lui a3, %hi(.LCPI6_6)
 ; RV32-NEXT:    addi a3, a3, %lo(.LCPI6_6)
-; RV32-NEXT:    vle16.v v16, (a1)
-; RV32-NEXT:    vle16.v v28, (a3)
+; RV32-NEXT:    vle16.v v20, (a1)
+; RV32-NEXT:    vle16.v v8, (a3)
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a3, a1, 3
+; RV32-NEXT:    add a1, a3, a1
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vs4r.v v8, (a1) # Unknown-size Folded Spill
 ; RV32-NEXT:    li a1, 960
-; RV32-NEXT:    vmv.s.x v20, a1
-; RV32-NEXT:    addi a1, sp, 16
-; RV32-NEXT:    vs1r.v v20, (a1) # Unknown-size Folded Spill
+; RV32-NEXT:    vmv.s.x v1, a1
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a3, 49
+; RV32-NEXT:    li a3, 37
 ; RV32-NEXT:    mul a1, a1, a3
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
-; RV32-NEXT:    vl8r.v v0, (a1) # Unknown-size Folded Reload
-; RV32-NEXT:    vrgatherei16.vv v8, v0, v16
-; RV32-NEXT:    vmv1r.v v0, v20
+; RV32-NEXT:    vl8r.v v8, (a1) # Unknown-size Folded Reload
+; RV32-NEXT:    vrgatherei16.vv v24, v8, v20
+; RV32-NEXT:    vmv1r.v v0, v1
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a3, 41
+; RV32-NEXT:    li a3, 53
 ; RV32-NEXT:    mul a1, a1, a3
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
 ; RV32-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
-; RV32-NEXT:    vrgatherei16.vv v8, v16, v28, v0.t
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a3, a1, 3
+; RV32-NEXT:    add a1, a3, a1
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vl4r.v v8, (a1) # Unknown-size Folded Reload
+; RV32-NEXT:    vrgatherei16.vv v24, v16, v8, v0.t
 ; RV32-NEXT:    vsetivli zero, 10, e32, m4, tu, ma
-; RV32-NEXT:    vmv.v.v v24, v8
+; RV32-NEXT:    vmv.v.v v4, v24
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    slli a1, a1, 2
+; RV32-NEXT:    slli a3, a1, 3
+; RV32-NEXT:    add a1, a3, a1
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
-; RV32-NEXT:    vs4r.v v24, (a1) # Unknown-size Folded Spill
+; RV32-NEXT:    vs4r.v v4, (a1) # Unknown-size Folded Spill
 ; RV32-NEXT:    lui a1, %hi(.LCPI6_7)
 ; RV32-NEXT:    addi a1, a1, %lo(.LCPI6_7)
-; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
+; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, mu
 ; RV32-NEXT:    vle16.v v8, (a1)
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a3, 25
+; RV32-NEXT:    li a3, 29
 ; RV32-NEXT:    mul a1, a1, a3
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
 ; RV32-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
 ; RV32-NEXT:    vrgatherei16.vv v12, v16, v8
-; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; RV32-NEXT:    vmv1r.v v0, v2
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    slli a3, a1, 4
-; RV32-NEXT:    add a1, a3, a1
+; RV32-NEXT:    li a3, 13
+; RV32-NEXT:    mul a1, a1, a3
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
 ; RV32-NEXT:    vl2r.v v8, (a1) # Unknown-size Folded Reload
-; RV32-NEXT:    vadd.vi v8, v8, -4
-; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    slli a1, a1, 4
-; RV32-NEXT:    add a1, sp, a1
-; RV32-NEXT:    addi a1, a1, 16
-; RV32-NEXT:    vl1r.v v0, (a1) # Unknown-size Folded Reload
-; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    slli a3, a1, 5
-; RV32-NEXT:    add a1, a3, a1
+; RV32-NEXT:    li a3, 45
+; RV32-NEXT:    mul a1, a1, a3
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
 ; RV32-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
 ; RV32-NEXT:    vrgatherei16.vv v12, v16, v8, v0.t
-; RV32-NEXT:    vmv.v.v v24, v12
+; RV32-NEXT:    vmv.v.v v4, v12
 ; RV32-NEXT:    lui a1, %hi(.LCPI6_8)
 ; RV32-NEXT:    addi a1, a1, %lo(.LCPI6_8)
 ; RV32-NEXT:    vsetvli zero, a2, e32, m8, ta, mu
 ; RV32-NEXT:    lui a3, %hi(.LCPI6_9)
 ; RV32-NEXT:    addi a3, a3, %lo(.LCPI6_9)
 ; RV32-NEXT:    vle16.v v16, (a1)
-; RV32-NEXT:    vle16.v v28, (a3)
+; RV32-NEXT:    vle16.v v20, (a3)
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a3, 49
+; RV32-NEXT:    li a3, 37
 ; RV32-NEXT:    mul a1, a1, a3
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
-; RV32-NEXT:    vl8r.v v0, (a1) # Unknown-size Folded Reload
-; RV32-NEXT:    vrgatherei16.vv v8, v0, v16
-; RV32-NEXT:    addi a1, sp, 16
-; RV32-NEXT:    vl1r.v v0, (a1) # Unknown-size Folded Reload
+; RV32-NEXT:    vl8r.v v24, (a1) # Unknown-size Folded Reload
+; RV32-NEXT:    vrgatherei16.vv v8, v24, v16
+; RV32-NEXT:    vmv1r.v v0, v1
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a3, 41
+; RV32-NEXT:    li a3, 53
 ; RV32-NEXT:    mul a1, a1, a3
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
-; RV32-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
-; RV32-NEXT:    vrgatherei16.vv v8, v16, v28, v0.t
+; RV32-NEXT:    vl8r.v v24, (a1) # Unknown-size Folded Reload
+; RV32-NEXT:    vrgatherei16.vv v8, v24, v20, v0.t
 ; RV32-NEXT:    vsetivli zero, 10, e32, m4, tu, ma
-; RV32-NEXT:    vmv.v.v v24, v8
+; RV32-NEXT:    vmv.v.v v4, v8
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    slli a3, a1, 4
-; RV32-NEXT:    add a1, a3, a1
+; RV32-NEXT:    li a3, 13
+; RV32-NEXT:    mul a1, a1, a3
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
-; RV32-NEXT:    vs4r.v v24, (a1) # Unknown-size Folded Spill
+; RV32-NEXT:    vs4r.v v4, (a1) # Unknown-size Folded Spill
 ; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, mu
 ; RV32-NEXT:    lui a1, %hi(.LCPI6_10)
 ; RV32-NEXT:    addi a1, a1, %lo(.LCPI6_10)
@@ -474,20 +468,25 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
 ; RV32-NEXT:    lui a1, 15
 ; RV32-NEXT:    vmv.s.x v0, a1
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    slli a1, a1, 4
+; RV32-NEXT:    slli a1, a1, 3
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
 ; RV32-NEXT:    vs1r.v v0, (a1) # Unknown-size Folded Spill
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a3, 25
+; RV32-NEXT:    li a3, 29
 ; RV32-NEXT:    mul a1, a1, a3
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
 ; RV32-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
-; RV32-NEXT:    vslideup.vi v20, v16, 6
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    slli a3, a1, 5
-; RV32-NEXT:    add a1, a3, a1
+; RV32-NEXT:    slli a1, a1, 2
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vl2r.v v10, (a1) # Unknown-size Folded Reload
+; RV32-NEXT:    vrgatherei16.vv v20, v16, v10
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    li a3, 45
+; RV32-NEXT:    mul a1, a1, a3
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
 ; RV32-NEXT:    vl8r.v v24, (a1) # Unknown-size Folded Reload
@@ -502,13 +501,13 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
 ; RV32-NEXT:    li a1, 1008
 ; RV32-NEXT:    vmv.s.x v28, a1
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a3, 25
+; RV32-NEXT:    li a3, 29
 ; RV32-NEXT:    mul a1, a1, a3
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
 ; RV32-NEXT:    vs1r.v v28, (a1) # Unknown-size Folded Spill
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a3, 49
+; RV32-NEXT:    li a3, 37
 ; RV32-NEXT:    mul a1, a1, a3
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
@@ -516,7 +515,7 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
 ; RV32-NEXT:    vrgatherei16.vv v8, v0, v24
 ; RV32-NEXT:    vmv1r.v v0, v28
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a3, 41
+; RV32-NEXT:    li a3, 53
 ; RV32-NEXT:    mul a1, a1, a3
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
@@ -529,19 +528,19 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
 ; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, mu
 ; RV32-NEXT:    vle16.v v8, (a1)
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    slli a1, a1, 4
+; RV32-NEXT:    slli a1, a1, 3
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
 ; RV32-NEXT:    vl1r.v v0, (a1) # Unknown-size Folded Reload
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a3, 12
+; RV32-NEXT:    li a3, 21
 ; RV32-NEXT:    mul a1, a1, a3
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
 ; RV32-NEXT:    vl4r.v v16, (a1) # Unknown-size Folded Reload
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    slli a3, a1, 5
-; RV32-NEXT:    add a1, a3, a1
+; RV32-NEXT:    li a3, 45
+; RV32-NEXT:    mul a1, a1, a3
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
 ; RV32-NEXT:    vl8r.v v24, (a1) # Unknown-size Folded Reload
@@ -554,33 +553,33 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
 ; RV32-NEXT:    vle16.v v24, (a1)
 ; RV32-NEXT:    vle16.v v8, (a2)
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    slli a2, a1, 5
-; RV32-NEXT:    add a1, a2, a1
+; RV32-NEXT:    li a2, 45
+; RV32-NEXT:    mul a1, a1, a2
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
 ; RV32-NEXT:    vs4r.v v8, (a1) # Unknown-size Folded Spill
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a2, 49
+; RV32-NEXT:    li a2, 37
 ; RV32-NEXT:    mul a1, a1, a2
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
 ; RV32-NEXT:    vl8r.v v0, (a1) # Unknown-size Folded Reload
 ; RV32-NEXT:    vrgatherei16.vv v8, v0, v24
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a2, 25
+; RV32-NEXT:    li a2, 29
 ; RV32-NEXT:    mul a1, a1, a2
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
 ; RV32-NEXT:    vl1r.v v0, (a1) # Unknown-size Folded Reload
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a2, 41
+; RV32-NEXT:    li a2, 53
 ; RV32-NEXT:    mul a1, a1, a2
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
 ; RV32-NEXT:    vl8r.v v24, (a1) # Unknown-size Folded Reload
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    slli a2, a1, 5
-; RV32-NEXT:    add a1, a2, a1
+; RV32-NEXT:    li a2, 45
+; RV32-NEXT:    mul a1, a1, a2
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
 ; RV32-NEXT:    vl4r.v v4, (a1) # Unknown-size Folded Reload
@@ -594,35 +593,37 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
 ; RV32-NEXT:    vse32.v v20, (a1)
 ; RV32-NEXT:    addi a1, a0, 192
 ; RV32-NEXT:    csrr a2, vlenb
-; RV32-NEXT:    slli a3, a2, 4
-; RV32-NEXT:    add a2, a3, a2
+; RV32-NEXT:    li a3, 13
+; RV32-NEXT:    mul a2, a2, a3
 ; RV32-NEXT:    add a2, sp, a2
 ; RV32-NEXT:    addi a2, a2, 16
 ; RV32-NEXT:    vl4r.v v8, (a2) # Unknown-size Folded Reload
 ; RV32-NEXT:    vse32.v v8, (a1)
 ; RV32-NEXT:    addi a1, a0, 128
 ; RV32-NEXT:    csrr a2, vlenb
-; RV32-NEXT:    slli a2, a2, 2
+; RV32-NEXT:    slli a3, a2, 3
+; RV32-NEXT:    add a2, a3, a2
 ; RV32-NEXT:    add a2, sp, a2
 ; RV32-NEXT:    addi a2, a2, 16
 ; RV32-NEXT:    vl4r.v v8, (a2) # Unknown-size Folded Reload
 ; RV32-NEXT:    vse32.v v8, (a1)
 ; RV32-NEXT:    addi a1, a0, 64
 ; RV32-NEXT:    csrr a2, vlenb
-; RV32-NEXT:    slli a2, a2, 3
+; RV32-NEXT:    slli a3, a2, 4
+; RV32-NEXT:    add a2, a3, a2
 ; RV32-NEXT:    add a2, sp, a2
 ; RV32-NEXT:    addi a2, a2, 16
 ; RV32-NEXT:    vl4r.v v8, (a2) # Unknown-size Folded Reload
 ; RV32-NEXT:    vse32.v v8, (a1)
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a2, 21
+; RV32-NEXT:    li a2, 25
 ; RV32-NEXT:    mul a1, a1, a2
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
 ; RV32-NEXT:    vl4r.v v8, (a1) # Unknown-size Folded Reload
 ; RV32-NEXT:    vse32.v v8, (a0)
 ; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    li a1, 58
+; RV32-NEXT:    li a1, 62
 ; RV32-NEXT:    mul a0, a0, a1
 ; RV32-NEXT:    add sp, sp, a0
 ; RV32-NEXT:    addi sp, sp, 16
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
index df41ac10f80d36..890707c6337fad 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
@@ -14638,5 +14638,453 @@ define <8 x i16> @mgather_shuffle_vrgather(ptr %base) {
   %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %allones, <8 x i16> poison)
   ret <8 x i16> %v
 }
+
+; v32i64 is not a legal type, so make sure we don't try to combine the mgather
+; to a vlse intrinsic until it is legalized and split.
+define <32 x i64> @mgather_strided_split(ptr %base) {
+; RV32V-LABEL: mgather_strided_split:
+; RV32V:       # %bb.0:
+; RV32V-NEXT:    li a1, 16
+; RV32V-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; RV32V-NEXT:    vlse64.v v8, (a0), a1
+; RV32V-NEXT:    addi a0, a0, 256
+; RV32V-NEXT:    vlse64.v v16, (a0), a1
+; RV32V-NEXT:    ret
+;
+; RV64V-LABEL: mgather_strided_split:
+; RV64V:       # %bb.0:
+; RV64V-NEXT:    li a1, 16
+; RV64V-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; RV64V-NEXT:    vlse64.v v8, (a0), a1
+; RV64V-NEXT:    addi a0, a0, 256
+; RV64V-NEXT:    vlse64.v v16, (a0), a1
+; RV64V-NEXT:    ret
+;
+; RV32ZVE32F-LABEL: mgather_strided_split:
+; RV32ZVE32F:       # %bb.0:
+; RV32ZVE32F-NEXT:    addi sp, sp, -512
+; RV32ZVE32F-NEXT:    .cfi_def_cfa_offset 512
+; RV32ZVE32F-NEXT:    sw ra, 508(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT:    sw s0, 504(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT:    sw s2, 500(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT:    sw s3, 496(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT:    sw s4, 492(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT:    sw s5, 488(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT:    sw s6, 484(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT:    sw s7, 480(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT:    sw s8, 476(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT:    sw s9, 472(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT:    sw s10, 468(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT:    sw s11, 464(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT:    .cfi_offset ra, -4
+; RV32ZVE32F-NEXT:    .cfi_offset s0, -8
+; RV32ZVE32F-NEXT:    .cfi_offset s2, -12
+; RV32ZVE32F-NEXT:    .cfi_offset s3, -16
+; RV32ZVE32F-NEXT:    .cfi_offset s4, -20
+; RV32ZVE32F-NEXT:    .cfi_offset s5, -24
+; RV32ZVE32F-NEXT:    .cfi_offset s6, -28
+; RV32ZVE32F-NEXT:    .cfi_offset s7, -32
+; RV32ZVE32F-NEXT:    .cfi_offset s8, -36
+; RV32ZVE32F-NEXT:    .cfi_offset s9, -40
+; RV32ZVE32F-NEXT:    .cfi_offset s10, -44
+; RV32ZVE32F-NEXT:    .cfi_offset s11, -48
+; RV32ZVE32F-NEXT:    addi s0, sp, 512
+; RV32ZVE32F-NEXT:    .cfi_def_cfa s0, 0
+; RV32ZVE32F-NEXT:    andi sp, sp, -128
+; RV32ZVE32F-NEXT:    li a2, 32
+; RV32ZVE32F-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
+; RV32ZVE32F-NEXT:    vid.v v8
+; RV32ZVE32F-NEXT:    vsll.vi v8, v8, 4
+; RV32ZVE32F-NEXT:    vadd.vx v8, v8, a1
+; RV32ZVE32F-NEXT:    vmv.x.s a1, v8
+; RV32ZVE32F-NEXT:    lw a3, 0(a1)
+; RV32ZVE32F-NEXT:    sw a3, 216(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT:    lw a1, 4(a1)
+; RV32ZVE32F-NEXT:    sw a1, 208(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
+; RV32ZVE32F-NEXT:    vslidedown.vi v16, v8, 1
+; RV32ZVE32F-NEXT:    vmv.x.s a1, v16
+; RV32ZVE32F-NEXT:    lw a3, 0(a1)
+; RV32ZVE32F-NEXT:    sw a3, 252(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT:    lw a1, 4(a1)
+; RV32ZVE32F-NEXT:    sw a1, 248(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT:    vslidedown.vi v16, v8, 2
+; RV32ZVE32F-NEXT:    vmv.x.s a1, v16
+; RV32ZVE32F-NEXT:    lw a3, 0(a1)
+; RV32ZVE32F-NEXT:    sw a3, 244(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT:    lw a1, 4(a1)
+; RV32ZVE32F-NEXT:    sw a1, 236(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT:    vslidedown.vi v16, v8, 3
+; RV32ZVE32F-NEXT:    vmv.x.s a1, v16
+; RV32ZVE32F-NEXT:    lw a3, 0(a1)
+; RV32ZVE32F-NEXT:    sw a3, 228(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT:    lw a1, 4(a1)
+; RV32ZVE32F-NEXT:    sw a1, 220(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
+; RV32ZVE32F-NEXT:    vslidedown.vi v16, v8, 4
+; RV32ZVE32F-NEXT:    vmv.x.s a1, v16
+; RV32ZVE32F-NEXT:    lw a3, 0(a1)
+; RV32ZVE32F-NEXT:    sw a3, 240(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT:    lw a1, 4(a1)
+; RV32ZVE32F-NEXT:    sw a1, 232(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT:    vslidedown.vi v16, v8, 5
+; RV32ZVE32F-NEXT:    vmv.x.s a1, v16
+; RV32ZVE32F-NEXT:    lw a3, 0(a1)
+; RV32ZVE32F-NEXT:    sw a3, 224(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT:    lw a1, 4(a1)
+; RV32ZVE32F-NEXT:    sw a1, 212(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT:    vslidedown.vi v16, v8, 6
+; RV32ZVE32F-NEXT:    vmv.x.s a1, v16
+; RV32ZVE32F-NEXT:    lw a3, 0(a1)
+; RV32ZVE32F-NEXT:    sw a3, 204(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT:    lw a1, 4(a1)
+; RV32ZVE32F-NEXT:    sw a1, 200(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT:    vslidedown.vi v16, v8, 7
+; RV32ZVE32F-NEXT:    vmv.x.s a1, v16
+; RV32ZVE32F-NEXT:    lw a3, 0(a1)
+; RV32ZVE32F-NEXT:    sw a3, 196(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT:    lw a1, 4(a1)
+; RV32ZVE32F-NEXT:    sw a1, 192(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT:    addi a1, sp, 256
+; RV32ZVE32F-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
+; RV32ZVE32F-NEXT:    vse32.v v8, (a1)
+; RV32ZVE32F-NEXT:    lw a1, 288(sp)
+; RV32ZVE32F-NEXT:    lw a2, 292(sp)
+; RV32ZVE32F-NEXT:    lw a3, 0(a1)
+; RV32ZVE32F-NEXT:    sw a3, 188(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT:    lw a1, 4(a1)
+; RV32ZVE32F-NEXT:    sw a1, 184(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT:    lw a1, 296(sp)
+; RV32ZVE32F-NEXT:    lw a3, 0(a2)
+; RV32ZVE32F-NEXT:    sw a3, 180(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT:    lw a2, 4(a2)
+; RV32ZVE32F-NEXT:    sw a2, 176(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT:    lw a2, 300(sp)
+; RV32ZVE32F-NEXT:    lw a3, 0(a1)
+; RV32ZVE32F-NEXT:    sw a3, 172(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT:    lw a1, 4(a1)
+; RV32ZVE32F-NEXT:    sw a1, 168(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT:    lw a1, 304(sp)
+; RV32ZVE32F-NEXT:    lw a3, 0(a2)
+; RV32ZVE32F-NEXT:    sw a3, 164(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT:    lw a2, 4(a2)
+; RV32ZVE32F-NEXT:    sw a2, 160(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT:    lw a2, 308(sp)
+; RV32ZVE32F-NEXT:    lw a3, 0(a1)
+; RV32ZVE32F-NEXT:    sw a3, 156(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT:    lw a1, 4(a1)
+; RV32ZVE32F-NEXT:    sw a1, 152(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT:    lw a1, 312(sp)
+; RV32ZVE32F-NEXT:    lw a3, 0(a2)
+; RV32ZVE32F-NEXT:    sw a3, 148(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT:    lw a2, 4(a2)
+; RV32ZVE32F-NEXT:    sw a2, 144(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT:    lw a2, 316(sp)
+; RV32ZVE32F-NEXT:    lw a3, 0(a1)
+; RV32ZVE32F-NEXT:    sw a3, 140(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT:    lw a1, 4(a1)
+; RV32ZVE32F-NEXT:    sw a1, 136(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT:    lw a1, 320(sp)
+; RV32ZVE32F-NEXT:    lw a3, 0(a2)
+; RV32ZVE32F-NEXT:    sw a3, 132(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT:    lw a2, 4(a2)
+; RV32ZVE32F-NEXT:    sw a2, 128(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT:    lw a2, 324(sp)
+; RV32ZVE32F-NEXT:    lw a3, 0(a1)
+; RV32ZVE32F-NEXT:    sw a3, 124(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT:    lw a1, 4(a1)
+; RV32ZVE32F-NEXT:    sw a1, 120(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT:    lw a1, 328(sp)
+; RV32ZVE32F-NEXT:    lw a3, 0(a2)
+; RV32ZVE32F-NEXT:    sw a3, 116(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT:    lw a2, 4(a2)
+; RV32ZVE32F-NEXT:    sw a2, 112(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT:    lw a2, 332(sp)
+; RV32ZVE32F-NEXT:    lw a3, 0(a1)
+; RV32ZVE32F-NEXT:    sw a3, 104(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT:    lw ra, 4(a1)
+; RV32ZVE32F-NEXT:    lw a1, 336(sp)
+; RV32ZVE32F-NEXT:    lw s10, 0(a2)
+; RV32ZVE32F-NEXT:    lw s8, 4(a2)
+; RV32ZVE32F-NEXT:    lw a2, 340(sp)
+; RV32ZVE32F-NEXT:    lw s6, 0(a1)
+; RV32ZVE32F-NEXT:    lw s4, 4(a1)
+; RV32ZVE32F-NEXT:    lw a4, 344(sp)
+; RV32ZVE32F-NEXT:    lw s2, 0(a2)
+; RV32ZVE32F-NEXT:    lw t5, 4(a2)
+; RV32ZVE32F-NEXT:    lw a2, 348(sp)
+; RV32ZVE32F-NEXT:    lw t3, 0(a4)
+; RV32ZVE32F-NEXT:    lw t2, 4(a4)
+; RV32ZVE32F-NEXT:    lw a4, 352(sp)
+; RV32ZVE32F-NEXT:    lw t0, 0(a2)
+; RV32ZVE32F-NEXT:    lw a7, 4(a2)
+; RV32ZVE32F-NEXT:    lw a2, 356(sp)
+; RV32ZVE32F-NEXT:    lw a6, 0(a4)
+; RV32ZVE32F-NEXT:    lw a5, 4(a4)
+; RV32ZVE32F-NEXT:    lw a4, 360(sp)
+; RV32ZVE32F-NEXT:    lw a1, 0(a2)
+; RV32ZVE32F-NEXT:    sw a1, 108(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT:    lw a1, 4(a2)
+; RV32ZVE32F-NEXT:    sw a1, 100(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT:    lw a2, 364(sp)
+; RV32ZVE32F-NEXT:    lw s11, 0(a4)
+; RV32ZVE32F-NEXT:    lw s9, 4(a4)
+; RV32ZVE32F-NEXT:    lw a1, 368(sp)
+; RV32ZVE32F-NEXT:    lw s7, 0(a2)
+; RV32ZVE32F-NEXT:    lw s5, 4(a2)
+; RV32ZVE32F-NEXT:    lw a3, 372(sp)
+; RV32ZVE32F-NEXT:    lw s3, 0(a1)
+; RV32ZVE32F-NEXT:    lw t6, 4(a1)
+; RV32ZVE32F-NEXT:    lw a2, 376(sp)
+; RV32ZVE32F-NEXT:    lw t4, 0(a3)
+; RV32ZVE32F-NEXT:    lw a1, 380(sp)
+; RV32ZVE32F-NEXT:    lw t1, 4(a3)
+; RV32ZVE32F-NEXT:    lw a4, 0(a2)
+; RV32ZVE32F-NEXT:    lw a3, 4(a2)
+; RV32ZVE32F-NEXT:    lw a2, 0(a1)
+; RV32ZVE32F-NEXT:    lw a1, 4(a1)
+; RV32ZVE32F-NEXT:    sw a5, 196(a0)
+; RV32ZVE32F-NEXT:    sw a6, 192(a0)
+; RV32ZVE32F-NEXT:    sw a7, 188(a0)
+; RV32ZVE32F-NEXT:    sw t0, 184(a0)
+; RV32ZVE32F-NEXT:    sw t2, 180(a0)
+; RV32ZVE32F-NEXT:    sw t3, 176(a0)
+; RV32ZVE32F-NEXT:    sw t5, 172(a0)
+; RV32ZVE32F-NEXT:    sw s2, 168(a0)
+; RV32ZVE32F-NEXT:    sw s4, 164(a0)
+; RV32ZVE32F-NEXT:    sw s6, 160(a0)
+; RV32ZVE32F-NEXT:    sw s8, 156(a0)
+; RV32ZVE32F-NEXT:    sw s10, 152(a0)
+; RV32ZVE32F-NEXT:    sw ra, 148(a0)
+; RV32ZVE32F-NEXT:    lw a5, 104(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT:    sw a5, 144(a0)
+; RV32ZVE32F-NEXT:    lw a5, 112(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT:    sw a5, 140(a0)
+; RV32ZVE32F-NEXT:    lw a5, 116(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT:    sw a5, 136(a0)
+; RV32ZVE32F-NEXT:    lw a5, 120(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT:    sw a5, 132(a0)
+; RV32ZVE32F-NEXT:    lw a5, 124(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT:    sw a5, 128(a0)
+; RV32ZVE32F-NEXT:    lw a5, 128(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT:    sw a5, 124(a0)
+; RV32ZVE32F-NEXT:    lw a5, 132(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT:    sw a5, 120(a0)
+; RV32ZVE32F-NEXT:    lw a5, 136(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT:    sw a5, 116(a0)
+; RV32ZVE32F-NEXT:    lw a5, 140(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT:    sw a5, 112(a0)
+; RV32ZVE32F-NEXT:    lw a5, 144(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT:    sw a5, 108(a0)
+; RV32ZVE32F-NEXT:    lw a5, 148(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT:    sw a5, 104(a0)
+; RV32ZVE32F-NEXT:    lw a5, 152(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT:    sw a5, 100(a0)
+; RV32ZVE32F-NEXT:    lw a5, 156(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT:    sw a5, 96(a0)
+; RV32ZVE32F-NEXT:    lw a5, 160(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT:    sw a5, 92(a0)
+; RV32ZVE32F-NEXT:    lw a5, 164(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT:    sw a5, 88(a0)
+; RV32ZVE32F-NEXT:    lw a5, 168(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT:    sw a5, 84(a0)
+; RV32ZVE32F-NEXT:    lw a5, 172(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT:    sw a5, 80(a0)
+; RV32ZVE32F-NEXT:    lw a5, 176(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT:    sw a5, 76(a0)
+; RV32ZVE32F-NEXT:    lw a5, 180(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT:    sw a5, 72(a0)
+; RV32ZVE32F-NEXT:    lw a5, 184(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT:    sw a5, 68(a0)
+; RV32ZVE32F-NEXT:    lw a5, 188(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT:    sw a5, 64(a0)
+; RV32ZVE32F-NEXT:    lw a5, 208(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT:    sw a5, 4(a0)
+; RV32ZVE32F-NEXT:    lw a5, 216(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT:    sw a5, 0(a0)
+; RV32ZVE32F-NEXT:    sw a1, 252(a0)
+; RV32ZVE32F-NEXT:    sw a2, 248(a0)
+; RV32ZVE32F-NEXT:    sw a3, 244(a0)
+; RV32ZVE32F-NEXT:    sw a4, 240(a0)
+; RV32ZVE32F-NEXT:    sw t1, 236(a0)
+; RV32ZVE32F-NEXT:    sw t4, 232(a0)
+; RV32ZVE32F-NEXT:    sw t6, 228(a0)
+; RV32ZVE32F-NEXT:    sw s3, 224(a0)
+; RV32ZVE32F-NEXT:    sw s5, 220(a0)
+; RV32ZVE32F-NEXT:    sw s7, 216(a0)
+; RV32ZVE32F-NEXT:    sw s9, 212(a0)
+; RV32ZVE32F-NEXT:    sw s11, 208(a0)
+; RV32ZVE32F-NEXT:    lw a1, 100(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT:    sw a1, 204(a0)
+; RV32ZVE32F-NEXT:    lw a1, 108(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT:    sw a1, 200(a0)
+; RV32ZVE32F-NEXT:    lw a1, 220(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT:    sw a1, 28(a0)
+; RV32ZVE32F-NEXT:    lw a1, 228(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT:    sw a1, 24(a0)
+; RV32ZVE32F-NEXT:    lw a1, 236(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT:    sw a1, 20(a0)
+; RV32ZVE32F-NEXT:    lw a1, 244(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT:    sw a1, 16(a0)
+; RV32ZVE32F-NEXT:    lw a1, 248(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT:    sw a1, 12(a0)
+; RV32ZVE32F-NEXT:    lw a1, 252(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT:    sw a1, 8(a0)
+; RV32ZVE32F-NEXT:    lw a1, 192(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT:    sw a1, 60(a0)
+; RV32ZVE32F-NEXT:    lw a1, 196(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT:    sw a1, 56(a0)
+; RV32ZVE32F-NEXT:    lw a1, 200(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT:    sw a1, 52(a0)
+; RV32ZVE32F-NEXT:    lw a1, 204(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT:    sw a1, 48(a0)
+; RV32ZVE32F-NEXT:    lw a1, 212(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT:    sw a1, 44(a0)
+; RV32ZVE32F-NEXT:    lw a1, 224(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT:    sw a1, 40(a0)
+; RV32ZVE32F-NEXT:    lw a1, 232(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT:    sw a1, 36(a0)
+; RV32ZVE32F-NEXT:    lw a1, 240(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT:    sw a1, 32(a0)
+; RV32ZVE32F-NEXT:    addi sp, s0, -512
+; RV32ZVE32F-NEXT:    lw ra, 508(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT:    lw s0, 504(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT:    lw s2, 500(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT:    lw s3, 496(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT:    lw s4, 492(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT:    lw s5, 488(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT:    lw s6, 484(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT:    lw s7, 480(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT:    lw s8, 476(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT:    lw s9, 472(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT:    lw s10, 468(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT:    lw s11, 464(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT:    addi sp, sp, 512
+; RV32ZVE32F-NEXT:    ret
+;
+; RV64ZVE32F-LABEL: mgather_strided_split:
+; RV64ZVE32F:       # %bb.0:
+; RV64ZVE32F-NEXT:    addi sp, sp, -144
+; RV64ZVE32F-NEXT:    .cfi_def_cfa_offset 144
+; RV64ZVE32F-NEXT:    sd ra, 136(sp) # 8-byte Folded Spill
+; RV64ZVE32F-NEXT:    sd s0, 128(sp) # 8-byte Folded Spill
+; RV64ZVE32F-NEXT:    sd s1, 120(sp) # 8-byte Folded Spill
+; RV64ZVE32F-NEXT:    sd s2, 112(sp) # 8-byte Folded Spill
+; RV64ZVE32F-NEXT:    sd s3, 104(sp) # 8-byte Folded Spill
+; RV64ZVE32F-NEXT:    sd s4, 96(sp) # 8-byte Folded Spill
+; RV64ZVE32F-NEXT:    sd s5, 88(sp) # 8-byte Folded Spill
+; RV64ZVE32F-NEXT:    sd s6, 80(sp) # 8-byte Folded Spill
+; RV64ZVE32F-NEXT:    sd s7, 72(sp) # 8-byte Folded Spill
+; RV64ZVE32F-NEXT:    sd s8, 64(sp) # 8-byte Folded Spill
+; RV64ZVE32F-NEXT:    sd s9, 56(sp) # 8-byte Folded Spill
+; RV64ZVE32F-NEXT:    sd s10, 48(sp) # 8-byte Folded Spill
+; RV64ZVE32F-NEXT:    sd s11, 40(sp) # 8-byte Folded Spill
+; RV64ZVE32F-NEXT:    .cfi_offset ra, -8
+; RV64ZVE32F-NEXT:    .cfi_offset s0, -16
+; RV64ZVE32F-NEXT:    .cfi_offset s1, -24
+; RV64ZVE32F-NEXT:    .cfi_offset s2, -32
+; RV64ZVE32F-NEXT:    .cfi_offset s3, -40
+; RV64ZVE32F-NEXT:    .cfi_offset s4, -48
+; RV64ZVE32F-NEXT:    .cfi_offset s5, -56
+; RV64ZVE32F-NEXT:    .cfi_offset s6, -64
+; RV64ZVE32F-NEXT:    .cfi_offset s7, -72
+; RV64ZVE32F-NEXT:    .cfi_offset s8, -80
+; RV64ZVE32F-NEXT:    .cfi_offset s9, -88
+; RV64ZVE32F-NEXT:    .cfi_offset s10, -96
+; RV64ZVE32F-NEXT:    .cfi_offset s11, -104
+; RV64ZVE32F-NEXT:    ld a2, 0(a1)
+; RV64ZVE32F-NEXT:    sd a2, 32(sp) # 8-byte Folded Spill
+; RV64ZVE32F-NEXT:    ld a2, 16(a1)
+; RV64ZVE32F-NEXT:    sd a2, 24(sp) # 8-byte Folded Spill
+; RV64ZVE32F-NEXT:    ld a2, 32(a1)
+; RV64ZVE32F-NEXT:    sd a2, 16(sp) # 8-byte Folded Spill
+; RV64ZVE32F-NEXT:    ld a2, 48(a1)
+; RV64ZVE32F-NEXT:    sd a2, 8(sp) # 8-byte Folded Spill
+; RV64ZVE32F-NEXT:    ld a2, 64(a1)
+; RV64ZVE32F-NEXT:    sd a2, 0(sp) # 8-byte Folded Spill
+; RV64ZVE32F-NEXT:    ld a7, 80(a1)
+; RV64ZVE32F-NEXT:    ld t0, 96(a1)
+; RV64ZVE32F-NEXT:    ld t1, 112(a1)
+; RV64ZVE32F-NEXT:    ld t2, 128(a1)
+; RV64ZVE32F-NEXT:    ld t3, 144(a1)
+; RV64ZVE32F-NEXT:    ld t4, 160(a1)
+; RV64ZVE32F-NEXT:    ld t5, 176(a1)
+; RV64ZVE32F-NEXT:    ld t6, 192(a1)
+; RV64ZVE32F-NEXT:    ld s0, 208(a1)
+; RV64ZVE32F-NEXT:    ld s1, 224(a1)
+; RV64ZVE32F-NEXT:    ld s2, 240(a1)
+; RV64ZVE32F-NEXT:    ld s3, 256(a1)
+; RV64ZVE32F-NEXT:    ld s4, 272(a1)
+; RV64ZVE32F-NEXT:    ld s5, 288(a1)
+; RV64ZVE32F-NEXT:    ld s6, 304(a1)
+; RV64ZVE32F-NEXT:    ld s7, 320(a1)
+; RV64ZVE32F-NEXT:    ld s8, 336(a1)
+; RV64ZVE32F-NEXT:    ld s9, 352(a1)
+; RV64ZVE32F-NEXT:    ld s10, 368(a1)
+; RV64ZVE32F-NEXT:    ld s11, 384(a1)
+; RV64ZVE32F-NEXT:    ld ra, 400(a1)
+; RV64ZVE32F-NEXT:    ld a6, 416(a1)
+; RV64ZVE32F-NEXT:    ld a5, 432(a1)
+; RV64ZVE32F-NEXT:    ld a2, 496(a1)
+; RV64ZVE32F-NEXT:    ld a3, 480(a1)
+; RV64ZVE32F-NEXT:    ld a4, 464(a1)
+; RV64ZVE32F-NEXT:    ld a1, 448(a1)
+; RV64ZVE32F-NEXT:    sd a2, 248(a0)
+; RV64ZVE32F-NEXT:    sd a3, 240(a0)
+; RV64ZVE32F-NEXT:    sd a4, 232(a0)
+; RV64ZVE32F-NEXT:    sd a1, 224(a0)
+; RV64ZVE32F-NEXT:    sd a5, 216(a0)
+; RV64ZVE32F-NEXT:    sd a6, 208(a0)
+; RV64ZVE32F-NEXT:    sd ra, 200(a0)
+; RV64ZVE32F-NEXT:    sd s11, 192(a0)
+; RV64ZVE32F-NEXT:    sd s10, 184(a0)
+; RV64ZVE32F-NEXT:    sd s9, 176(a0)
+; RV64ZVE32F-NEXT:    sd s8, 168(a0)
+; RV64ZVE32F-NEXT:    sd s7, 160(a0)
+; RV64ZVE32F-NEXT:    sd s6, 152(a0)
+; RV64ZVE32F-NEXT:    sd s5, 144(a0)
+; RV64ZVE32F-NEXT:    sd s4, 136(a0)
+; RV64ZVE32F-NEXT:    sd s3, 128(a0)
+; RV64ZVE32F-NEXT:    sd s2, 120(a0)
+; RV64ZVE32F-NEXT:    sd s1, 112(a0)
+; RV64ZVE32F-NEXT:    sd s0, 104(a0)
+; RV64ZVE32F-NEXT:    sd t6, 96(a0)
+; RV64ZVE32F-NEXT:    sd t5, 88(a0)
+; RV64ZVE32F-NEXT:    sd t4, 80(a0)
+; RV64ZVE32F-NEXT:    sd t3, 72(a0)
+; RV64ZVE32F-NEXT:    sd t2, 64(a0)
+; RV64ZVE32F-NEXT:    sd t1, 56(a0)
+; RV64ZVE32F-NEXT:    sd t0, 48(a0)
+; RV64ZVE32F-NEXT:    sd a7, 40(a0)
+; RV64ZVE32F-NEXT:    ld a1, 0(sp) # 8-byte Folded Reload
+; RV64ZVE32F-NEXT:    sd a1, 32(a0)
+; RV64ZVE32F-NEXT:    ld a1, 8(sp) # 8-byte Folded Reload
+; RV64ZVE32F-NEXT:    sd a1, 24(a0)
+; RV64ZVE32F-NEXT:    ld a1, 16(sp) # 8-byte Folded Reload
+; RV64ZVE32F-NEXT:    sd a1, 16(a0)
+; RV64ZVE32F-NEXT:    ld a1, 24(sp) # 8-byte Folded Reload
+; RV64ZVE32F-NEXT:    sd a1, 8(a0)
+; RV64ZVE32F-NEXT:    ld a1, 32(sp) # 8-byte Folded Reload
+; RV64ZVE32F-NEXT:    sd a1, 0(a0)
+; RV64ZVE32F-NEXT:    ld ra, 136(sp) # 8-byte Folded Reload
+; RV64ZVE32F-NEXT:    ld s0, 128(sp) # 8-byte Folded Reload
+; RV64ZVE32F-NEXT:    ld s1, 120(sp) # 8-byte Folded Reload
+; RV64ZVE32F-NEXT:    ld s2, 112(sp) # 8-byte Folded Reload
+; RV64ZVE32F-NEXT:    ld s3, 104(sp) # 8-byte Folded Reload
+; RV64ZVE32F-NEXT:    ld s4, 96(sp) # 8-byte Folded Reload
+; RV64ZVE32F-NEXT:    ld s5, 88(sp) # 8-byte Folded Reload
+; RV64ZVE32F-NEXT:    ld s6, 80(sp) # 8-byte Folded Reload
+; RV64ZVE32F-NEXT:    ld s7, 72(sp) # 8-byte Folded Reload
+; RV64ZVE32F-NEXT:    ld s8, 64(sp) # 8-byte Folded Reload
+; RV64ZVE32F-NEXT:    ld s9, 56(sp) # 8-byte Folded Reload
+; RV64ZVE32F-NEXT:    ld s10, 48(sp) # 8-byte Folded Reload
+; RV64ZVE32F-NEXT:    ld s11, 40(sp) # 8-byte Folded Reload
+; RV64ZVE32F-NEXT:    addi sp, sp, 144
+; RV64ZVE32F-NEXT:    ret
+  %ptrs = getelementptr inbounds i64, ptr %base, <32 x i64> <i64 0, i64 2, i64 4, i64 6, i64 8, i64 10, i64 12, i64 14, i64 16, i64 18, i64 20, i64 22, i64 24, i64 26, i64 28, i64 30, i64 32, i64 34, i64 36, i64 38, i64 40, i64 42, i64 44, i64 46, i64 48, i64 50, i64 52, i64 54, i64 56, i64 58, i64 60, i64 62>
+  %x = call <32 x i64> @llvm.masked.gather.v32i64.v32p0(<32 x ptr> %ptrs, i32 8, <32 x i1> shufflevector(<32 x i1> insertelement(<32 x i1> poison, i1 true, i32 0), <32 x i1> poison, <32 x i32> zeroinitializer), <32 x i64> poison)
+  ret <32 x i64> %x
+}
+
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; RV64: {{.*}}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-transpose.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-transpose.ll
index a34fa9502d93b3..d0777962a75651 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-transpose.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-transpose.ll
@@ -8,11 +8,13 @@ define <8 x i8> @trn1.v8i8(<8 x i8> %v0, <8 x i8> %v1) {
 ; CHECK-LABEL: trn1.v8i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, mu
-; CHECK-NEXT:    vid.v v10
+; CHECK-NEXT:    vid.v v11
+; CHECK-NEXT:    vrgather.vv v10, v8, v11
 ; CHECK-NEXT:    li a0, 170
 ; CHECK-NEXT:    vmv.s.x v0, a0
-; CHECK-NEXT:    vadd.vi v10, v10, -1
-; CHECK-NEXT:    vrgather.vv v8, v9, v10, v0.t
+; CHECK-NEXT:    vadd.vi v8, v11, -1
+; CHECK-NEXT:    vrgather.vv v10, v9, v8, v0.t
+; CHECK-NEXT:    vmv1r.v v8, v10
 ; CHECK-NEXT:    ret
   %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
   ret <8 x i8> %tmp0
@@ -22,11 +24,13 @@ define <8 x i8> @trn2.v8i8(<8 x i8> %v0, <8 x i8> %v1) {
 ; CHECK-LABEL: trn2.v8i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, mu
-; CHECK-NEXT:    vslidedown.vi v8, v8, 1
+; CHECK-NEXT:    vid.v v11
+; CHECK-NEXT:    vadd.vi v12, v11, 1
 ; CHECK-NEXT:    li a0, 170
 ; CHECK-NEXT:    vmv.s.x v0, a0
-; CHECK-NEXT:    vid.v v10
-; CHECK-NEXT:    vrgather.vv v8, v9, v10, v0.t
+; CHECK-NEXT:    vrgather.vv v10, v8, v12
+; CHECK-NEXT:    vrgather.vv v10, v9, v11, v0.t
+; CHECK-NEXT:    vmv1r.v v8, v10
 ; CHECK-NEXT:    ret
   %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
   ret <8 x i8> %tmp0
@@ -36,14 +40,16 @@ define <16 x i8> @trn1.v16i8(<16 x i8> %v0, <16 x i8> %v1) {
 ; CHECK-LABEL: trn1.v16i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; CHECK-NEXT:    vid.v v10
-; CHECK-NEXT:    vadd.vi v10, v10, -1
+; CHECK-NEXT:    vid.v v11
+; CHECK-NEXT:    vrgather.vv v10, v8, v11
+; CHECK-NEXT:    vadd.vi v8, v11, -1
 ; CHECK-NEXT:    lui a0, 11
 ; CHECK-NEXT:    addi a0, a0, -1366
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vmv.s.x v0, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e8, m1, ta, mu
-; CHECK-NEXT:    vrgather.vv v8, v9, v10, v0.t
+; CHECK-NEXT:    vrgather.vv v10, v9, v8, v0.t
+; CHECK-NEXT:    vmv.v.v v8, v10
 ; CHECK-NEXT:    ret
   %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
   ret <16 x i8> %tmp0
@@ -53,14 +59,16 @@ define <16 x i8> @trn2.v16i8(<16 x i8> %v0, <16 x i8> %v1) {
 ; CHECK-LABEL: trn2.v16i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; CHECK-NEXT:    vslidedown.vi v8, v8, 1
+; CHECK-NEXT:    vid.v v11
+; CHECK-NEXT:    vadd.vi v12, v11, 1
+; CHECK-NEXT:    vrgather.vv v10, v8, v12
 ; CHECK-NEXT:    lui a0, 11
 ; CHECK-NEXT:    addi a0, a0, -1366
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vmv.s.x v0, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e8, m1, ta, mu
-; CHECK-NEXT:    vid.v v10
-; CHECK-NEXT:    vrgather.vv v8, v9, v10, v0.t
+; CHECK-NEXT:    vrgather.vv v10, v9, v11, v0.t
+; CHECK-NEXT:    vmv.v.v v8, v10
 ; CHECK-NEXT:    ret
   %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
   ret <16 x i8> %tmp0
@@ -70,10 +78,12 @@ define <4 x i16> @trn1.v4i16(<4 x i16> %v0, <4 x i16> %v1) {
 ; CHECK-LABEL: trn1.v4i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, mu
-; CHECK-NEXT:    vid.v v10
+; CHECK-NEXT:    vid.v v11
+; CHECK-NEXT:    vrgather.vv v10, v8, v11
 ; CHECK-NEXT:    vmv.v.i v0, 10
-; CHECK-NEXT:    vadd.vi v10, v10, -1
-; CHECK-NEXT:    vrgather.vv v8, v9, v10, v0.t
+; CHECK-NEXT:    vadd.vi v8, v11, -1
+; CHECK-NEXT:    vrgather.vv v10, v9, v8, v0.t
+; CHECK-NEXT:    vmv1r.v v8, v10
 ; CHECK-NEXT:    ret
   %tmp0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
   ret <4 x i16> %tmp0
@@ -83,10 +93,12 @@ define <4 x i16> @trn2.v4i16(<4 x i16> %v0, <4 x i16> %v1) {
 ; CHECK-LABEL: trn2.v4i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, mu
-; CHECK-NEXT:    vslidedown.vi v8, v8, 1
+; CHECK-NEXT:    vid.v v11
+; CHECK-NEXT:    vadd.vi v12, v11, 1
 ; CHECK-NEXT:    vmv.v.i v0, 10
-; CHECK-NEXT:    vid.v v10
-; CHECK-NEXT:    vrgather.vv v8, v9, v10, v0.t
+; CHECK-NEXT:    vrgather.vv v10, v8, v12
+; CHECK-NEXT:    vrgather.vv v10, v9, v11, v0.t
+; CHECK-NEXT:    vmv1r.v v8, v10
 ; CHECK-NEXT:    ret
   %tmp0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
   ret <4 x i16> %tmp0
@@ -96,11 +108,13 @@ define <8 x i16> @trn1.v8i16(<8 x i16> %v0, <8 x i16> %v1) {
 ; CHECK-LABEL: trn1.v8i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, mu
-; CHECK-NEXT:    vid.v v10
+; CHECK-NEXT:    vid.v v11
+; CHECK-NEXT:    vrgather.vv v10, v8, v11
 ; CHECK-NEXT:    li a0, 170
 ; CHECK-NEXT:    vmv.s.x v0, a0
-; CHECK-NEXT:    vadd.vi v10, v10, -1
-; CHECK-NEXT:    vrgather.vv v8, v9, v10, v0.t
+; CHECK-NEXT:    vadd.vi v8, v11, -1
+; CHECK-NEXT:    vrgather.vv v10, v9, v8, v0.t
+; CHECK-NEXT:    vmv.v.v v8, v10
 ; CHECK-NEXT:    ret
   %tmp0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
   ret <8 x i16> %tmp0
@@ -110,11 +124,13 @@ define <8 x i16> @trn2.v8i16(<8 x i16> %v0, <8 x i16> %v1) {
 ; CHECK-LABEL: trn2.v8i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, mu
-; CHECK-NEXT:    vslidedown.vi v8, v8, 1
+; CHECK-NEXT:    vid.v v11
+; CHECK-NEXT:    vadd.vi v12, v11, 1
 ; CHECK-NEXT:    li a0, 170
 ; CHECK-NEXT:    vmv.s.x v0, a0
-; CHECK-NEXT:    vid.v v10
-; CHECK-NEXT:    vrgather.vv v8, v9, v10, v0.t
+; CHECK-NEXT:    vrgather.vv v10, v8, v12
+; CHECK-NEXT:    vrgather.vv v10, v9, v11, v0.t
+; CHECK-NEXT:    vmv.v.v v8, v10
 ; CHECK-NEXT:    ret
   %tmp0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
   ret <8 x i16> %tmp0
@@ -147,10 +163,12 @@ define <4 x i32> @trn1.v4i32(<4 x i32> %v0, <4 x i32> %v1) {
 ; CHECK-LABEL: trn1.v4i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
-; CHECK-NEXT:    vid.v v10
+; CHECK-NEXT:    vid.v v11
+; CHECK-NEXT:    vrgather.vv v10, v8, v11
 ; CHECK-NEXT:    vmv.v.i v0, 10
-; CHECK-NEXT:    vadd.vi v10, v10, -1
-; CHECK-NEXT:    vrgather.vv v8, v9, v10, v0.t
+; CHECK-NEXT:    vadd.vi v8, v11, -1
+; CHECK-NEXT:    vrgather.vv v10, v9, v8, v0.t
+; CHECK-NEXT:    vmv.v.v v8, v10
 ; CHECK-NEXT:    ret
   %tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
   ret <4 x i32> %tmp0
@@ -160,10 +178,12 @@ define <4 x i32> @trn2.v4i32(<4 x i32> %v0, <4 x i32> %v1) {
 ; CHECK-LABEL: trn2.v4i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
-; CHECK-NEXT:    vslidedown.vi v8, v8, 1
+; CHECK-NEXT:    vid.v v11
+; CHECK-NEXT:    vadd.vi v12, v11, 1
 ; CHECK-NEXT:    vmv.v.i v0, 10
-; CHECK-NEXT:    vid.v v10
-; CHECK-NEXT:    vrgather.vv v8, v9, v10, v0.t
+; CHECK-NEXT:    vrgather.vv v10, v8, v12
+; CHECK-NEXT:    vrgather.vv v10, v9, v11, v0.t
+; CHECK-NEXT:    vmv.v.v v8, v10
 ; CHECK-NEXT:    ret
   %tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
   ret <4 x i32> %tmp0
@@ -219,10 +239,12 @@ define <4 x float> @trn1.v4f32(<4 x float> %v0, <4 x float> %v1) {
 ; CHECK-LABEL: trn1.v4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
-; CHECK-NEXT:    vid.v v10
+; CHECK-NEXT:    vid.v v11
+; CHECK-NEXT:    vrgather.vv v10, v8, v11
 ; CHECK-NEXT:    vmv.v.i v0, 10
-; CHECK-NEXT:    vadd.vi v10, v10, -1
-; CHECK-NEXT:    vrgather.vv v8, v9, v10, v0.t
+; CHECK-NEXT:    vadd.vi v8, v11, -1
+; CHECK-NEXT:    vrgather.vv v10, v9, v8, v0.t
+; CHECK-NEXT:    vmv.v.v v8, v10
 ; CHECK-NEXT:    ret
   %tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
   ret <4 x float> %tmp0
@@ -232,10 +254,12 @@ define <4 x float> @trn2.v4f32(<4 x float> %v0, <4 x float> %v1) {
 ; CHECK-LABEL: trn2.v4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
-; CHECK-NEXT:    vslidedown.vi v8, v8, 1
+; CHECK-NEXT:    vid.v v11
+; CHECK-NEXT:    vadd.vi v12, v11, 1
 ; CHECK-NEXT:    vmv.v.i v0, 10
-; CHECK-NEXT:    vid.v v10
-; CHECK-NEXT:    vrgather.vv v8, v9, v10, v0.t
+; CHECK-NEXT:    vrgather.vv v10, v8, v12
+; CHECK-NEXT:    vrgather.vv v10, v9, v11, v0.t
+; CHECK-NEXT:    vmv.v.v v8, v10
 ; CHECK-NEXT:    ret
   %tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
   ret <4 x float> %tmp0
@@ -268,10 +292,12 @@ define <4 x half> @trn1.v4f16(<4 x half> %v0, <4 x half> %v1) {
 ; CHECK-LABEL: trn1.v4f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, mu
-; CHECK-NEXT:    vid.v v10
+; CHECK-NEXT:    vid.v v11
+; CHECK-NEXT:    vrgather.vv v10, v8, v11
 ; CHECK-NEXT:    vmv.v.i v0, 10
-; CHECK-NEXT:    vadd.vi v10, v10, -1
-; CHECK-NEXT:    vrgather.vv v8, v9, v10, v0.t
+; CHECK-NEXT:    vadd.vi v8, v11, -1
+; CHECK-NEXT:    vrgather.vv v10, v9, v8, v0.t
+; CHECK-NEXT:    vmv1r.v v8, v10
 ; CHECK-NEXT:    ret
   %tmp0 = shufflevector <4 x half> %v0, <4 x half> %v1, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
   ret <4 x half> %tmp0
@@ -281,10 +307,12 @@ define <4 x half> @trn2.v4f16(<4 x half> %v0, <4 x half> %v1) {
 ; CHECK-LABEL: trn2.v4f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, mu
-; CHECK-NEXT:    vslidedown.vi v8, v8, 1
+; CHECK-NEXT:    vid.v v11
+; CHECK-NEXT:    vadd.vi v12, v11, 1
 ; CHECK-NEXT:    vmv.v.i v0, 10
-; CHECK-NEXT:    vid.v v10
-; CHECK-NEXT:    vrgather.vv v8, v9, v10, v0.t
+; CHECK-NEXT:    vrgather.vv v10, v8, v12
+; CHECK-NEXT:    vrgather.vv v10, v9, v11, v0.t
+; CHECK-NEXT:    vmv1r.v v8, v10
 ; CHECK-NEXT:    ret
   %tmp0 = shufflevector <4 x half> %v0, <4 x half> %v1, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
   ret <4 x half> %tmp0
@@ -294,11 +322,13 @@ define <8 x half> @trn1.v8f16(<8 x half> %v0, <8 x half> %v1) {
 ; CHECK-LABEL: trn1.v8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, mu
-; CHECK-NEXT:    vid.v v10
+; CHECK-NEXT:    vid.v v11
+; CHECK-NEXT:    vrgather.vv v10, v8, v11
 ; CHECK-NEXT:    li a0, 170
 ; CHECK-NEXT:    vmv.s.x v0, a0
-; CHECK-NEXT:    vadd.vi v10, v10, -1
-; CHECK-NEXT:    vrgather.vv v8, v9, v10, v0.t
+; CHECK-NEXT:    vadd.vi v8, v11, -1
+; CHECK-NEXT:    vrgather.vv v10, v9, v8, v0.t
+; CHECK-NEXT:    vmv.v.v v8, v10
 ; CHECK-NEXT:    ret
   %tmp0 = shufflevector <8 x half> %v0, <8 x half> %v1, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
   ret <8 x half> %tmp0
@@ -308,11 +338,13 @@ define <8 x half> @trn2.v8f16(<8 x half> %v0, <8 x half> %v1) {
 ; CHECK-LABEL: trn2.v8f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, mu
-; CHECK-NEXT:    vslidedown.vi v8, v8, 1
+; CHECK-NEXT:    vid.v v11
+; CHECK-NEXT:    vadd.vi v12, v11, 1
 ; CHECK-NEXT:    li a0, 170
 ; CHECK-NEXT:    vmv.s.x v0, a0
-; CHECK-NEXT:    vid.v v10
-; CHECK-NEXT:    vrgather.vv v8, v9, v10, v0.t
+; CHECK-NEXT:    vrgather.vv v10, v8, v12
+; CHECK-NEXT:    vrgather.vv v10, v9, v11, v0.t
+; CHECK-NEXT:    vmv.v.v v8, v10
 ; CHECK-NEXT:    ret
   %tmp0 = shufflevector <8 x half> %v0, <8 x half> %v1, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
   ret <8 x half> %tmp0
diff --git a/llvm/test/CodeGen/SPARC/reserved-arg-regs.ll b/llvm/test/CodeGen/SPARC/reserved-arg-regs.ll
new file mode 100644
index 00000000000000..3587ecb7f3c94a
--- /dev/null
+++ b/llvm/test/CodeGen/SPARC/reserved-arg-regs.ll
@@ -0,0 +1,25 @@
+;; Test reserving argument registers.
+; RUN: not llc < %s -mtriple=sparc-linux-gnu -mattr=+reserve-o0 2>&1 | FileCheck %s --check-prefixes=CHECK-RESERVED-O0
+; RUN: not llc < %s -mtriple=sparc64-linux-gnu -mattr=+reserve-o0 2>&1 | FileCheck %s --check-prefixes=CHECK-RESERVED-O0
+; RUN: not llc < %s -mtriple=sparc-linux-gnu -mattr=+reserve-i0 2>&1 | FileCheck %s --check-prefixes=CHECK-RESERVED-I0
+; RUN: not llc < %s -mtriple=sparc64-linux-gnu -mattr=+reserve-i0 2>&1 | FileCheck %s --check-prefixes=CHECK-RESERVED-I0
+
+; CHECK-RESERVED-O0: error:
+; CHECK-RESERVED-O0-SAME: SPARC doesn't support function calls if any of the argument registers is reserved.
+; CHECK-RESERVED-I0: error:
+; CHECK-RESERVED-I0-SAME: SPARC doesn't support function calls if any of the argument registers is reserved.
+define void @call_function() {
+  call void @foo()
+  ret void
+}
+declare void @foo()
+
+; CHECK-RESERVED-O0: error:
+; CHECK-RESERVED-O0-SAME: SPARC doesn't support function calls if any of the argument registers is reserved.
+; CHECK-RESERVED-I0: error:
+; CHECK-RESERVED-I0-SAME: SPARC doesn't support function calls if any of the argument registers is reserved.
+define void @call_function_with_arg(i8 %in) {
+  call void @bar(i8 %in)
+  ret void
+}
+declare void @bar(i8)
diff --git a/llvm/test/CodeGen/SPARC/reserved-regs-named.ll b/llvm/test/CodeGen/SPARC/reserved-regs-named.ll
new file mode 100644
index 00000000000000..91808be156c559
--- /dev/null
+++ b/llvm/test/CodeGen/SPARC/reserved-regs-named.ll
@@ -0,0 +1,13 @@
+; RUN: llc -mtriple=sparc64-linux-gnu -mattr=+reserve-l0 -o - %s | FileCheck %s --check-prefixes=CHECK-RESERVED-L0
+
+;; Ensure explicit register references are catched as well.
+
+; CHECK-RESERVED-L0: %l0
+define void @set_reg(i32 zeroext %x) {
+entry:
+  tail call void @llvm.write_register.i32(metadata !0, i32 %x)
+  ret void
+}
+
+declare void @llvm.write_register.i32(metadata, i32)
+!0 = !{!"l0"}
diff --git a/llvm/test/CodeGen/SPARC/reserved-regs-unavailable.ll b/llvm/test/CodeGen/SPARC/reserved-regs-unavailable.ll
new file mode 100644
index 00000000000000..53ca045f100443
--- /dev/null
+++ b/llvm/test/CodeGen/SPARC/reserved-regs-unavailable.ll
@@ -0,0 +1,14 @@
+; RUN: not --crash llc -mtriple=sparc64-linux-gnu -o - %s 2>&1 | FileCheck %s --check-prefixes=CHECK-RESERVED-L0
+
+;; Ensure explicit register references for non-reserved registers
+;; are caught properly.
+
+; CHECK-RESERVED-L0: LLVM ERROR: Invalid register name global variable
+define void @set_reg(i32 zeroext %x) {
+entry:
+  tail call void @llvm.write_register.i32(metadata !0, i32 %x)
+  ret void
+}
+
+declare void @llvm.write_register.i32(metadata, i32)
+!0 = !{!"l0"}
diff --git a/llvm/test/CodeGen/SPARC/reserved-regs.ll b/llvm/test/CodeGen/SPARC/reserved-regs.ll
index ec6290586eeef2..7dea1f31538b84 100644
--- a/llvm/test/CodeGen/SPARC/reserved-regs.ll
+++ b/llvm/test/CodeGen/SPARC/reserved-regs.ll
@@ -1,5 +1,14 @@
 ; RUN: llc -march=sparc -verify-machineinstrs < %s | FileCheck %s
 
+;; Test reserve-* options.
+; RUN: llc -mtriple=sparc64-linux-gnu -mattr=+reserve-g1 -o - %s | FileCheck %s --check-prefixes=CHECK-RESERVED-G1
+; RUN: llc -mtriple=sparc64-linux-gnu -mattr=+reserve-o1 -o - %s | FileCheck %s --check-prefixes=CHECK-RESERVED-O1
+; RUN: llc -mtriple=sparc64-linux-gnu -mattr=+reserve-l1 -o - %s | FileCheck %s --check-prefixes=CHECK-RESERVED-L1
+; RUN: llc -mtriple=sparc64-linux-gnu -mattr=+reserve-i1 -o - %s | FileCheck %s --check-prefixes=CHECK-RESERVED-I1
+
+;; Test multiple reserve-* options together.
+; RUN: llc -mtriple=sparc64-linux-gnu -mattr=+reserve-g1 -mattr=+reserve-o1 -mattr=+reserve-l1 -mattr=+reserve-i1 -o - %s | FileCheck %s --check-prefixes=CHECK-RESERVED-G1,CHECK-RESERVED-O1,CHECK-RESERVED-L1,CHECK-RESERVED-I1
+
 @g = common global [32 x i32] zeroinitializer, align 16
 @h = common global [16 x i64] zeroinitializer, align 16
 
@@ -16,6 +25,10 @@
 ; CHECK-NOT: %o6
 ; CHECK-NOT: %i6
 ; CHECK-NOT: %i7
+; CHECK-RESERVED-G1-NOT: %g1
+; CHECK-RESERVED-O1-NOT: %o1
+; CHECK-RESERVED-L1-NOT: %l1
+; CHECK-RESERVED-I1-NOT: %i1
 ; CHECK: ret
 define void @use_all_i32_regs() {
 entry:
@@ -100,6 +113,10 @@ entry:
 ; CHECK-NOT: %o7
 ; CHECK-NOT: %i6
 ; CHECK-NOT: %i7
+; CHECK-RESERVED-G1-NOT: %g1
+; CHECK-RESERVED-O1-NOT: %o1
+; CHECK-RESERVED-L1-NOT: %l1
+; CHECK-RESERVED-I1-NOT: %i1
 ; CHECK: ret
 define void @use_all_i64_regs() {
 entry:
diff --git a/llvm/test/CodeGen/X86/load-combine.ll b/llvm/test/CodeGen/X86/load-combine.ll
index 7f8115dc1ce389..b5f3e789918813 100644
--- a/llvm/test/CodeGen/X86/load-combine.ll
+++ b/llvm/test/CodeGen/X86/load-combine.ll
@@ -1282,3 +1282,35 @@ define i32 @zext_load_i32_by_i8_bswap_shl_16(ptr %arg) {
   %tmp8 = or i32 %tmp7, %tmp30
   ret i32 %tmp8
 }
+
+define i32 @pr80911_vector_load_multiuse(ptr %ptr, ptr %clobber) nounwind {
+; CHECK-LABEL: pr80911_vector_load_multiuse:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    pushl %esi
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; CHECK-NEXT:    movl (%edx), %esi
+; CHECK-NEXT:    movzwl (%edx), %eax
+; CHECK-NEXT:    movl $0, (%ecx)
+; CHECK-NEXT:    movl %esi, (%edx)
+; CHECK-NEXT:    popl %esi
+; CHECK-NEXT:    retl
+;
+; CHECK64-LABEL: pr80911_vector_load_multiuse:
+; CHECK64:       # %bb.0:
+; CHECK64-NEXT:    movl (%rdi), %ecx
+; CHECK64-NEXT:    movzwl (%rdi), %eax
+; CHECK64-NEXT:    movl $0, (%rsi)
+; CHECK64-NEXT:    movl %ecx, (%rdi)
+; CHECK64-NEXT:    retq
+  %load = load <4 x i8>, ptr %ptr, align 16
+  store i32 0, ptr %clobber
+  store <4 x i8> %load, ptr %ptr, align 16
+  %e1 = extractelement <4 x i8> %load, i64 1
+  %e1.ext = zext i8 %e1 to i32
+  %e1.ext.shift = shl nuw nsw i32 %e1.ext, 8
+  %e0 = extractelement <4 x i8> %load, i64 0
+  %e0.ext = zext i8 %e0 to i32
+  %res = or i32 %e1.ext.shift, %e0.ext
+  ret i32 %res
+}
diff --git a/llvm/test/MC/Mips/macro-la-pic.s b/llvm/test/MC/Mips/macro-la-pic.s
index 2303f34c35bcfe..1875952d80c4e7 100644
--- a/llvm/test/MC/Mips/macro-la-pic.s
+++ b/llvm/test/MC/Mips/macro-la-pic.s
@@ -255,3 +255,25 @@ la $25, 2f
 # XN32: lw $25, %got_disp(.Ltmp1)($gp)  # encoding: [0x8f,0x99,A,A]
 # XN32:                                 #   fixup A - offset: 0, value: %got_disp(.Ltmp1), kind: fixup_Mips_GOT_DISP
 2:
+
+la $2,.Lstr
+# O32:      lw      $2, %got(.Lstr)($gp)          # encoding: [0x8f,0x82,A,A]
+# O32-NEXT:                           #   fixup A - offset: 0, value: %got(.Lstr), kind: fixup_Mips_GOT
+# O32-NEXT: addiu   $2, $2, %lo(.Lstr)            # encoding: [0x24,0x42,A,A]
+# O32-NEXT:                           #   fixup A - offset: 0, value: %lo(.Lstr), kind: fixup_Mips_LO16
+
+# N32:      lw      $2, %got_disp(.Lstr)($gp)     # encoding: [0x8f,0x82,A,A]
+# N32-NEXT:                           #   fixup A - offset: 0, value: %got_disp(.Lstr), kind: fixup_Mips_GOT_DISP
+
+la $2,$str2
+# O32:      lw      $2, %got($str2)($gp)          # encoding: [0x8f,0x82,A,A]
+# O32-NEXT: #   fixup A - offset: 0, value: %got($str2), kind: fixup_Mips_GOT
+# O32-NEXT: addiu   $2, $2, %lo($str2)            # encoding: [0x24,0x42,A,A]
+# O32-NEXT: #   fixup A - offset: 0, value: %lo($str2), kind: fixup_Mips_LO16
+
+# N32:      lw      $2, %got_disp($str2)($gp)     # encoding: [0x8f,0x82,A,A]
+# N32-NEXT:                           #   fixup A - offset: 0, value: %got_disp($str2), kind: fixup_Mips_GOT_DISP
+
+.rodata
+.Lstr: .4byte 0
+$str2: .4byte 0
diff --git a/llvm/test/Transforms/Attributor/nofpclass-fpext.ll b/llvm/test/Transforms/Attributor/nofpclass-fpext.ll
index 0ba114117ceec6..ee36f949529d4f 100644
--- a/llvm/test/Transforms/Attributor/nofpclass-fpext.ll
+++ b/llvm/test/Transforms/Attributor/nofpclass-fpext.ll
@@ -142,7 +142,7 @@ define double @ret_fpext_f32_to_f64_nosub(float nofpclass(sub) %arg0) {
 }
 
 define double @ret_fpext_f32_to_f64_nonorm(float nofpclass(norm) %arg0) {
-; CHECK-LABEL: define nofpclass(sub norm) double @ret_fpext_f32_to_f64_nonorm
+; CHECK-LABEL: define nofpclass(sub) double @ret_fpext_f32_to_f64_nonorm
 ; CHECK-SAME: (float nofpclass(sub norm) [[ARG0:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    [[EXT:%.*]] = fpext float [[ARG0]] to double
 ; CHECK-NEXT:    ret double [[EXT]]
@@ -482,7 +482,37 @@ define double @ret_fpext_bf16_f64_nosub(bfloat nofpclass(sub) %arg0) {
 }
 
 define double @ret_fpext_bf16_f64_nonorm(bfloat nofpclass(norm) %arg0) {
-; CHECK-LABEL: define nofpclass(sub norm) double @ret_fpext_bf16_f64_nonorm
+; CHECK-LABEL: define nofpclass(sub) double @ret_fpext_bf16_f64_nonorm
+; CHECK-SAME: (bfloat nofpclass(sub norm) [[ARG0:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[EXT:%.*]] = fpext bfloat [[ARG0]] to double
+; CHECK-NEXT:    ret double [[EXT]]
+;
+  %ext = fpext bfloat %arg0 to double
+  ret double %ext
+}
+
+define double @ret_fpext_bf16_f64_nonorm_psub(bfloat nofpclass(norm psub) %arg0) {
+; CHECK-LABEL: define nofpclass(sub pnorm) double @ret_fpext_bf16_f64_nonorm_psub
+; CHECK-SAME: (bfloat nofpclass(sub norm) [[ARG0:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[EXT:%.*]] = fpext bfloat [[ARG0]] to double
+; CHECK-NEXT:    ret double [[EXT]]
+;
+  %ext = fpext bfloat %arg0 to double
+  ret double %ext
+}
+
+define double @ret_fpext_bf16_f64_nonorm_nsub(bfloat nofpclass(norm nsub) %arg0) {
+; CHECK-LABEL: define nofpclass(sub nnorm) double @ret_fpext_bf16_f64_nonorm_nsub
+; CHECK-SAME: (bfloat nofpclass(sub norm) [[ARG0:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[EXT:%.*]] = fpext bfloat [[ARG0]] to double
+; CHECK-NEXT:    ret double [[EXT]]
+;
+  %ext = fpext bfloat %arg0 to double
+  ret double %ext
+}
+
+define double @ret_fpext_bf16_f64_nonorm_sub(bfloat nofpclass(norm sub) %arg0) {
+; CHECK-LABEL: define nofpclass(sub norm) double @ret_fpext_bf16_f64_nonorm_sub
 ; CHECK-SAME: (bfloat nofpclass(sub norm) [[ARG0:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    [[EXT:%.*]] = fpext bfloat [[ARG0]] to double
 ; CHECK-NEXT:    ret double [[EXT]]
diff --git a/llvm/test/Transforms/IndVarSimplify/pr55925.ll b/llvm/test/Transforms/IndVarSimplify/pr55925.ll
index 420fc209949d4f..312a8295ccdc9f 100644
--- a/llvm/test/Transforms/IndVarSimplify/pr55925.ll
+++ b/llvm/test/Transforms/IndVarSimplify/pr55925.ll
@@ -18,9 +18,9 @@ define void @test(ptr %p) personality ptr undef {
 ; CHECK-NEXT:    [[RES:%.*]] = invoke i32 @foo(i32 returned [[TMP0]])
 ; CHECK-NEXT:            to label [[LOOP_LATCH]] unwind label [[EXIT:%.*]]
 ; CHECK:       loop.latch:
-; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[INDVARS_IV]] to i32
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @foo(i32 [[TMP1]])
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    br label [[LOOP]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    [[LP:%.*]] = landingpad { ptr, i32 }
@@ -64,8 +64,8 @@ define void @test_critedge(i1 %c, ptr %p) personality ptr undef {
 ; CHECK-NEXT:    br label [[LOOP_LATCH]]
 ; CHECK:       loop.latch:
 ; CHECK-NEXT:    [[PHI:%.*]] = phi i32 [ [[TMP1]], [[LOOP_INVOKE]] ], [ 0, [[LOOP_OTHER]] ]
-; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @foo(i32 [[PHI]])
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    br label [[LOOP]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    [[LP:%.*]] = landingpad { ptr, i32 }
diff --git a/llvm/test/Transforms/IndVarSimplify/pr79861.ll b/llvm/test/Transforms/IndVarSimplify/pr79861.ll
new file mode 100644
index 00000000000000..66250944961397
--- /dev/null
+++ b/llvm/test/Transforms/IndVarSimplify/pr79861.ll
@@ -0,0 +1,107 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -S -passes=indvars < %s | FileCheck %s
+
+target datalayout = "n64"
+
+declare void @use(i64)
+
+define void @or_disjoint() {
+; CHECK-LABEL: define void @or_disjoint() {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 2, [[ENTRY:%.*]] ], [ [[IV_DEC:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[OR:%.*]] = or disjoint i64 [[IV]], 1
+; CHECK-NEXT:    [[ADD:%.*]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 false, i64 [[OR]], i64 [[ADD]]
+; CHECK-NEXT:    call void @use(i64 [[SEL]])
+; CHECK-NEXT:    [[IV_DEC]] = add nsw i64 [[IV]], -1
+; CHECK-NEXT:    [[EXIT_COND:%.*]] = icmp eq i64 [[IV_DEC]], 0
+; CHECK-NEXT:    br i1 [[EXIT_COND]], label [[EXIT:%.*]], label [[LOOP]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 2, %entry ], [ %iv.dec, %loop ]
+  %or = or disjoint i64 %iv, 1
+  %add = add nsw i64 %iv, 1
+  %sel = select i1 false, i64 %or, i64 %add
+  call void @use(i64 %sel)
+
+  %iv.dec = add nsw i64 %iv, -1
+  %exit.cond = icmp eq i64 %iv.dec, 0
+  br i1 %exit.cond, label %exit, label %loop
+
+exit:
+  ret void
+}
+
+define void @add_nowrap_flags(i64 %n) {
+; CHECK-LABEL: define void @add_nowrap_flags(
+; CHECK-SAME: i64 [[N:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_INC:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[ADD1:%.*]] = add i64 [[IV]], 123
+; CHECK-NEXT:    call void @use(i64 [[ADD1]])
+; CHECK-NEXT:    [[IV_INC]] = add i64 [[IV]], 1
+; CHECK-NEXT:    [[EXIT_COND:%.*]] = icmp eq i64 [[IV_INC]], [[N]]
+; CHECK-NEXT:    br i1 [[EXIT_COND]], label [[EXIT:%.*]], label [[LOOP]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.inc, %loop ]
+  %add1 = add nuw nsw i64 %iv, 123
+  %add2 = add i64 %iv, 123
+  %sel = select i1 false, i64 %add1, i64 %add2
+  call void @use(i64 %sel)
+
+  %iv.inc = add i64 %iv, 1
+  %exit.cond = icmp eq i64 %iv.inc, %n
+  br i1 %exit.cond, label %exit, label %loop
+
+exit:
+  ret void
+}
+
+
+define void @expander_or_disjoint(i64 %n) {
+; CHECK-LABEL: define void @expander_or_disjoint(
+; CHECK-SAME: i64 [[N:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[OR:%.*]] = or disjoint i64 [[N]], 1
+; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[N]], 1
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_INC:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[IV_INC]] = add i64 [[IV]], 1
+; CHECK-NEXT:    [[ADD:%.*]] = add i64 [[IV]], [[OR]]
+; CHECK-NEXT:    call void @use(i64 [[ADD]])
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[IV_INC]], [[TMP0]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %or = or disjoint i64 %n, 1
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.inc, %loop ]
+  %iv.inc = add i64 %iv, 1
+  %add = add i64 %iv, %or
+  call void @use(i64 %add)
+  %cmp = icmp ult i64 %iv, %n
+  br i1 %cmp, label %loop, label %exit
+
+exit:
+  ret void
+}
diff --git a/llvm/test/Transforms/InstCombine/getelementptr.ll b/llvm/test/Transforms/InstCombine/getelementptr.ll
index 642c3eb2a0e41b..c90b6c9fb29592 100644
--- a/llvm/test/Transforms/InstCombine/getelementptr.ll
+++ b/llvm/test/Transforms/InstCombine/getelementptr.ll
@@ -116,6 +116,7 @@ define void @test_overaligned_vec(i8 %B) {
 ; CHECK-LABEL: @test_overaligned_vec(
 ; CHECK-NEXT:    store i8 [[B:%.*]], ptr getelementptr inbounds ([10 x i8], ptr @Global, i64 0, i64 2), align 1
 ; CHECK-NEXT:    ret void
+;
   %A = getelementptr <2 x half>, ptr @Global, i64 0, i64 1
   store i8 %B, ptr %A
   ret void
@@ -1473,6 +1474,16 @@ define ptr @gep_sdiv(ptr %p, i64 %off) {
   ret ptr %ptr
 }
 
+define ptr @gep_udiv(ptr %p, i64 %off) {
+; CHECK-LABEL: @gep_udiv(
+; CHECK-NEXT:    [[PTR:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 [[OFF:%.*]]
+; CHECK-NEXT:    ret ptr [[PTR]]
+;
+  %index = udiv exact i64 %off, 7
+  %ptr = getelementptr %struct.C, ptr %p, i64 %index
+  ret ptr %ptr
+}
+
 define <2 x ptr> @gep_sdiv_vec(<2 x ptr> %p, <2 x i64> %off) {
 ; CHECK-LABEL: @gep_sdiv_vec(
 ; CHECK-NEXT:    [[PTR:%.*]] = getelementptr i8, <2 x ptr> [[P:%.*]], <2 x i64> [[OFF:%.*]]
@@ -1503,6 +1514,16 @@ define ptr @gep_ashr(ptr %p, i64 %off) {
   ret ptr %ptr
 }
 
+define ptr @gep_lshr(ptr %p, i64 %off) {
+; CHECK-LABEL: @gep_lshr(
+; CHECK-NEXT:    [[PTR:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 [[OFF:%.*]]
+; CHECK-NEXT:    ret ptr [[PTR]]
+;
+  %index = lshr exact i64 %off, 2
+  %ptr = getelementptr i32, ptr %p, i64 %index
+  ret ptr %ptr
+}
+
 ; Negative tests
 
 define ptr @gep_i8(ptr %p, i64 %off) {
@@ -1525,6 +1546,17 @@ define ptr @gep_sdiv_mismatched_size(ptr %p, i64 %off) {
   ret ptr %ptr
 }
 
+define ptr @gep_udiv_mismatched_size(ptr %p, i64 %off) {
+; CHECK-LABEL: @gep_udiv_mismatched_size(
+; CHECK-NEXT:    [[INDEX:%.*]] = udiv exact i64 [[OFF:%.*]], 20
+; CHECK-NEXT:    [[PTR:%.*]] = getelementptr [[STRUCT_C:%.*]], ptr [[P:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    ret ptr [[PTR]]
+;
+  %index = udiv exact i64 %off, 20
+  %ptr = getelementptr %struct.C, ptr %p, i64 %index
+  ret ptr %ptr
+}
+
 define ptr @gep_sdiv_without_exact(ptr %p, i64 %off) {
 ; CHECK-LABEL: @gep_sdiv_without_exact(
 ; CHECK-NEXT:    [[INDEX:%.*]] = sdiv i64 [[OFF:%.*]], 7
@@ -1536,6 +1568,17 @@ define ptr @gep_sdiv_without_exact(ptr %p, i64 %off) {
   ret ptr %ptr
 }
 
+define ptr @gep_udiv_without_exact(ptr %p, i64 %off) {
+; CHECK-LABEL: @gep_udiv_without_exact(
+; CHECK-NEXT:    [[INDEX:%.*]] = udiv i64 [[OFF:%.*]], 7
+; CHECK-NEXT:    [[PTR:%.*]] = getelementptr [[STRUCT_C:%.*]], ptr [[P:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    ret ptr [[PTR]]
+;
+  %index = udiv i64 %off, 7
+  %ptr = getelementptr %struct.C, ptr %p, i64 %index
+  ret ptr %ptr
+}
+
 define ptr @gep_ashr_without_exact(ptr %p, i64 %off) {
 ; CHECK-LABEL: @gep_ashr_without_exact(
 ; CHECK-NEXT:    [[INDEX:%.*]] = ashr i64 [[OFF:%.*]], 2
@@ -1547,6 +1590,17 @@ define ptr @gep_ashr_without_exact(ptr %p, i64 %off) {
   ret ptr %ptr
 }
 
+define ptr @gep_lshr_without_exact(ptr %p, i64 %off) {
+; CHECK-LABEL: @gep_lshr_without_exact(
+; CHECK-NEXT:    [[INDEX:%.*]] = lshr i64 [[OFF:%.*]], 2
+; CHECK-NEXT:    [[PTR:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    ret ptr [[PTR]]
+;
+  %index = lshr i64 %off, 2
+  %ptr = getelementptr i32, ptr %p, i64 %index
+  ret ptr %ptr
+}
+
 define i1 @test_only_used_by_icmp(ptr %a, ptr %b, ptr %c) {
 ; CHECK-LABEL: @test_only_used_by_icmp(
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq ptr [[B:%.*]], [[C:%.*]]
diff --git a/llvm/test/Transforms/InstCombine/pr80941.ll b/llvm/test/Transforms/InstCombine/pr80941.ll
new file mode 100644
index 00000000000000..95242b1d1407bf
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/pr80941.ll
@@ -0,0 +1,32 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -S -passes=instcombine < %s | FileCheck %s
+
+define float @pr80941(float %arg) {
+; CHECK-LABEL: define float @pr80941(
+; CHECK-SAME: float [[ARG:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[COND:%.*]] = tail call i1 @llvm.is.fpclass.f32(float [[ARG]], i32 144)
+; CHECK-NEXT:    br i1 [[COND]], label [[IF_THEN:%.*]], label [[IF_EXIT:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    [[FPEXT:%.*]] = fpext float [[ARG]] to double
+; CHECK-NEXT:    [[SIGN:%.*]] = call double @llvm.copysign.f64(double 0.000000e+00, double [[FPEXT]])
+; CHECK-NEXT:    [[FPTRUNC:%.*]] = fptrunc double [[SIGN]] to float
+; CHECK-NEXT:    br label [[IF_EXIT]]
+; CHECK:       if.exit:
+; CHECK-NEXT:    [[RET:%.*]] = phi float [ [[FPTRUNC]], [[IF_THEN]] ], [ [[ARG]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    ret float [[RET]]
+;
+entry:
+  %cond = tail call i1 @llvm.is.fpclass.f32(float %arg, i32 144)
+  br i1 %cond, label %if.then, label %if.exit
+
+if.then:
+  %fpext = fpext float %arg to double
+  %sign = call double @llvm.copysign.f64(double 0.000000e+00, double %fpext)
+  %fptrunc = fptrunc double %sign to float
+  br label %if.exit
+
+if.exit:
+  %ret = phi float [ %fptrunc, %if.then ], [ %arg, %entry ]
+  ret float %ret
+}
diff --git a/llvm/test/Transforms/LowerTypeTests/cfi-annotation.ll b/llvm/test/Transforms/LowerTypeTests/cfi-annotation.ll
new file mode 100644
index 00000000000000..034af89112cb63
--- /dev/null
+++ b/llvm/test/Transforms/LowerTypeTests/cfi-annotation.ll
@@ -0,0 +1,68 @@
+; REQUIRES: aarch64-registered-target
+
+; RUN: opt -passes=lowertypetests %s -o %t.o
+; RUN: llvm-dis %t.o -o - | FileCheck %s --check-prefix=CHECK-foobar
+; CHECK-foobar: {{llvm.global.annotations = .*[foo|bar], .*[foo|bar],}}
+; RUN: llvm-dis %t.o -o - | FileCheck %s --check-prefix=CHECK-cfi
+; CHECK-cfi-NOT: {{llvm.global.annotations = .*cfi.*}}
+
+target triple = "aarch64-none-linux-gnu"
+
+@.src = private unnamed_addr constant [7 x i8] c"test.c\00", align 1
+@.str = private unnamed_addr constant [30 x i8] c"annotation_string_literal_bar\00", section "llvm.metadata"
+@.str.1 = private unnamed_addr constant [7 x i8] c"test.c\00", section "llvm.metadata"
+@.str.2 = private unnamed_addr constant [30 x i8] c"annotation_string_literal_foo\00", section "llvm.metadata"
+@llvm.global.annotations = appending global [2 x { ptr, ptr, ptr, i32, ptr }] [{ ptr, ptr, ptr, i32, ptr } { ptr @bar, ptr @.str, ptr @.str.1, i32 2, ptr null }, { ptr, ptr, ptr, i32, ptr } { ptr @foo, ptr @.str.2, ptr @.str.1, i32 1, ptr null }], section "llvm.metadata"
+
+define i32 @bar(i32 noundef %0) #0 !type !8 !type !9 {
+  %2 = alloca i32, align 4
+  store i32 %0, ptr %2, align 4
+  %3 = load i32, ptr %2, align 4
+  %4 = call i32 @foo(i32 noundef %3)
+  ret i32 %4
+}
+
+declare !type !8 !type !9 i32 @foo(i32 noundef) #1
+
+define i32 @test(i32 noundef %0) #0 !type !8 !type !9 {
+  %2 = alloca i32, align 4
+  %3 = alloca ptr, align 8
+  store i32 %0, ptr %2, align 4
+  %4 = load i32, ptr %2, align 4
+  %5 = icmp sgt i32 %4, 0
+  %6 = zext i1 %5 to i64
+  %7 = select i1 %5, ptr @foo, ptr @bar
+  store ptr %7, ptr %3, align 8
+  %8 = load ptr, ptr %3, align 8
+  %9 = call i1 @llvm.type.test(ptr %8, metadata !"_ZTSFiiE"), !nosanitize !10
+  br i1 %9, label %11, label %10, !nosanitize !10
+
+10:
+  call void @llvm.ubsantrap(i8 2) #4, !nosanitize !10
+  unreachable, !nosanitize !10
+
+11:
+  %12 = load i32, ptr %2, align 4
+  %13 = call i32 %8(i32 noundef %12)
+  ret i32 %13
+}
+
+declare i1 @llvm.type.test(ptr, metadata)
+declare void @llvm.ubsantrap(i8 immarg)
+
+attributes #0 = { noinline nounwind optnone uwtable "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+fp-armv8,+neon,+v8a,-fmv" }
+attributes #1 = { "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+fp-armv8,+neon,+v8a,-fmv" }
+attributes #4 = { noreturn nounwind }
+
+!llvm.module.flags = !{!0, !1, !2, !3, !4, !5, !6}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 4, !"CFI Canonical Jump Tables", i32 0}
+!2 = !{i32 8, !"PIC Level", i32 2}
+!3 = !{i32 7, !"uwtable", i32 2}
+!4 = !{i32 7, !"frame-pointer", i32 1}
+!5 = !{i32 1, !"ThinLTO", i32 0}
+!6 = !{i32 1, !"EnableSplitLTOUnit", i32 1}
+!8 = !{i64 0, !"_ZTSFiiE"}
+!9 = !{i64 0, !"_ZTSFiiE.generalized"}
+!10 = !{}
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extractelement-multi-register-use.ll b/llvm/test/Transforms/SLPVectorizer/X86/extractelement-multi-register-use.ll
new file mode 100644
index 00000000000000..ba406c8f20bb08
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/extractelement-multi-register-use.ll
@@ -0,0 +1,107 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu -mcpu=x86-64-v3 < %s | FileCheck %s
+
+define void @test(double %i) {
+; CHECK-LABEL: define void @test(
+; CHECK-SAME: double [[I:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x double> poison, double [[I]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = fsub <2 x double> zeroinitializer, [[TMP0]]
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> <double 0.000000e+00, double poison>, double [[I]], i32 1
+; CHECK-NEXT:    [[TMP3:%.*]] = fsub <2 x double> zeroinitializer, [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x double> [[TMP3]], i32 1
+; CHECK-NEXT:    [[TMP5:%.*]] = fsub <2 x double> [[TMP0]], zeroinitializer
+; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> [[TMP3]], <4 x i32> <i32 poison, i32 poison, i32 0, i32 3>
+; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> [[TMP5]], <4 x i32> <i32 poison, i32 0, i32 2, i32 poison>
+; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <4 x double> [[TMP6]], <4 x double> [[TMP7]], <8 x i32> <i32 poison, i32 poison, i32 2, i32 3, i32 poison, i32 5, i32 6, i32 poison>
+; CHECK-NEXT:    [[TMP9:%.*]] = shufflevector <8 x double> [[TMP8]], <8 x double> <double 0.000000e+00, double 0.000000e+00, double poison, double poison, double 0.000000e+00, double poison, double poison, double poison>, <8 x i32> <i32 8, i32 9, i32 2, i32 3, i32 12, i32 5, i32 6, i32 poison>
+; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <8 x double> [[TMP9]], double [[TMP4]], i32 7
+; CHECK-NEXT:    [[TMP11:%.*]] = fmul <8 x double> zeroinitializer, [[TMP10]]
+; CHECK-NEXT:    [[TMP12:%.*]] = fadd <8 x double> zeroinitializer, [[TMP11]]
+; CHECK-NEXT:    [[TMP13:%.*]] = fadd <8 x double> [[TMP12]], zeroinitializer
+; CHECK-NEXT:    [[TMP14:%.*]] = fcmp ult <8 x double> [[TMP13]], zeroinitializer
+; CHECK-NEXT:    br label [[BB116:%.*]]
+; CHECK:       bb116:
+; CHECK-NEXT:    [[TMP15:%.*]] = fmul <2 x double> zeroinitializer, [[TMP5]]
+; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <2 x double> [[TMP15]], i32 0
+; CHECK-NEXT:    [[TMP17:%.*]] = extractelement <2 x double> [[TMP15]], i32 1
+; CHECK-NEXT:    [[I120:%.*]] = fadd double [[TMP16]], [[TMP17]]
+; CHECK-NEXT:    [[TMP18:%.*]] = fmul <2 x double> zeroinitializer, [[TMP1]]
+; CHECK-NEXT:    [[TMP19:%.*]] = fmul <2 x double> zeroinitializer, [[TMP3]]
+; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <2 x double> [[TMP18]], i32 0
+; CHECK-NEXT:    [[TMP21:%.*]] = extractelement <2 x double> [[TMP18]], i32 1
+; CHECK-NEXT:    [[I128:%.*]] = fadd double [[TMP20]], [[TMP21]]
+; CHECK-NEXT:    [[I139:%.*]] = call double @llvm.maxnum.f64(double [[I128]], double 0.000000e+00)
+; CHECK-NEXT:    [[TMP22:%.*]] = fadd <2 x double> [[TMP19]], zeroinitializer
+; CHECK-NEXT:    [[TMP23:%.*]] = call <2 x double> @llvm.maxnum.v2f64(<2 x double> [[TMP22]], <2 x double> zeroinitializer)
+; CHECK-NEXT:    [[TMP24:%.*]] = fmul <2 x double> [[TMP23]], zeroinitializer
+; CHECK-NEXT:    [[TMP25:%.*]] = fptosi <2 x double> [[TMP24]] to <2 x i32>
+; CHECK-NEXT:    [[TMP26:%.*]] = sub <2 x i32> zeroinitializer, [[TMP25]]
+; CHECK-NEXT:    [[TMP27:%.*]] = icmp sgt <2 x i32> [[TMP26]], zeroinitializer
+; CHECK-NEXT:    [[I147:%.*]] = fcmp ogt double [[I120]], 0.000000e+00
+; CHECK-NEXT:    ret void
+;
+bb:
+  %i74 = fsub double 0.000000e+00, poison
+  %i75 = fsub double 0.000000e+00, %i
+  %i76 = fmul double 0.000000e+00, %i75
+  %i77 = fadd double %i76, 0.000000e+00
+  %i78 = fadd double %i77, 0.000000e+00
+  %i79 = fcmp ult double %i78, 0.000000e+00
+  %i81 = fsub double %i, 0.000000e+00
+  %i82 = fmul double 0.000000e+00, %i81
+  %i83 = fadd double 0.000000e+00, %i82
+  %i84 = fadd double %i83, 0.000000e+00
+  %i85 = fcmp ult double %i84, 0.000000e+00
+  %i86 = fsub double 0.000000e+00, %i
+  %i87 = fmul double 0.000000e+00, %i86
+  %i88 = fadd double %i87, 0.000000e+00
+  %i89 = fadd double %i88, 0.000000e+00
+  %i90 = fcmp ult double %i89, 0.000000e+00
+  %i91 = fsub double 0.000000e+00, 0.000000e+00
+  %i92 = fmul double 0.000000e+00, 0.000000e+00
+  %i93 = fadd double %i92, 0.000000e+00
+  %i94 = fadd double %i93, 0.000000e+00
+  %i95 = fcmp ult double %i94, 0.000000e+00
+  %i96 = fsub double poison, 0.000000e+00
+  %i97 = fadd double %i77, 0.000000e+00
+  %i98 = fcmp ult double %i97, 0.000000e+00
+  %i99 = fadd double %i83, 0.000000e+00
+  %i100 = fcmp ult double %i99, 0.000000e+00
+  %i101 = fmul double 0.000000e+00, 0.000000e+00
+  %i102 = fadd double %i101, 0.000000e+00
+  %i103 = fadd double %i102, 0.000000e+00
+  %i104 = fcmp ult double %i103, 0.000000e+00
+  %i105 = fmul double 0.000000e+00, 0.000000e+00
+  %i106 = fadd double %i105, 0.000000e+00
+  %i107 = fadd double %i106, 0.000000e+00
+  %i108 = fcmp ult double %i107, 0.000000e+00
+  br label %bb116
+
+bb116:
+  %i117 = fmul double 0.000000e+00, %i81
+  %i119 = fmul double 0.000000e+00, %i96
+  %i120 = fadd double %i117, %i119
+  %i121 = fmul double 0.000000e+00, %i74
+  %i122 = fmul double 0.000000e+00, %i75
+  %i123 = fadd double %i122, 0.000000e+00
+  %i124 = fmul double 0.000000e+00, %i91
+  %i125 = fadd double %i124, 0.000000e+00
+  %i127 = fmul double 0.000000e+00, %i86
+  %i128 = fadd double %i127, %i121
+  %i133 = call double @llvm.maxnum.f64(double %i123, double 0.000000e+00)
+  %i134 = fmul double %i133, 0.000000e+00
+  %i135 = fptosi double %i134 to i32
+  %i136 = sub i32 0, %i135
+  %i137 = icmp sgt i32 %i136, 0
+  %i139 = call double @llvm.maxnum.f64(double %i128, double 0.000000e+00)
+  %i142 = call double @llvm.maxnum.f64(double %i125, double 0.000000e+00)
+  %i143 = fmul double %i142, 0.000000e+00
+  %i144 = fptosi double %i143 to i32
+  %i145 = sub i32 0, %i144
+  %i146 = icmp sgt i32 %i145, 0
+  %i147 = fcmp ogt double %i120, 0.000000e+00
+  ret void
+}
+
+declare double @llvm.maxnum.f64(double, double)
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extractelement-single-use-many-nodes.ll b/llvm/test/Transforms/SLPVectorizer/X86/extractelement-single-use-many-nodes.ll
new file mode 100644
index 00000000000000..f665dac3282b79
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/extractelement-single-use-many-nodes.ll
@@ -0,0 +1,144 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -mcpu=x86-64-v3 -S < %s | FileCheck %s
+
+define void @foo(double %i) {
+; CHECK-LABEL: define void @foo(
+; CHECK-SAME: double [[I:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x double> <double 0.000000e+00, double 0.000000e+00, double poison, double 0.000000e+00>, double [[I]], i32 2
+; CHECK-NEXT:    [[TMP1:%.*]] = fsub <4 x double> zeroinitializer, [[TMP0]]
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x double> [[TMP1]], i32 1
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x double> poison, double [[I]], i32 0
+; CHECK-NEXT:    [[TMP4:%.*]] = fsub <2 x double> zeroinitializer, [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x double> [[TMP4]], i32 1
+; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <8 x i32> <i32 poison, i32 poison, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <8 x double> [[TMP6]], <8 x double> <double 0.000000e+00, double poison, double poison, double poison, double poison, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00>, <8 x i32> <i32 8, i32 poison, i32 2, i32 poison, i32 poison, i32 13, i32 14, i32 15>
+; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <8 x double> [[TMP7]], double [[TMP2]], i32 3
+; CHECK-NEXT:    [[TMP9:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <8 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 poison, i32 1>
+; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <8 x double> [[TMP9]], <8 x double> <double poison, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double poison, double poison, double poison>, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 12, i32 5, i32 poison, i32 7>
+; CHECK-NEXT:    [[TMP11:%.*]] = insertelement <8 x double> [[TMP10]], double [[TMP5]], i32 6
+; CHECK-NEXT:    [[TMP12:%.*]] = fmul <8 x double> [[TMP8]], [[TMP11]]
+; CHECK-NEXT:    [[TMP13:%.*]] = fadd <8 x double> zeroinitializer, [[TMP12]]
+; CHECK-NEXT:    [[TMP14:%.*]] = fadd <8 x double> [[TMP13]], zeroinitializer
+; CHECK-NEXT:    [[TMP15:%.*]] = fcmp ult <8 x double> [[TMP14]], zeroinitializer
+; CHECK-NEXT:    [[TMP16:%.*]] = freeze <8 x i1> [[TMP15]]
+; CHECK-NEXT:    [[TMP17:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP16]])
+; CHECK-NEXT:    br i1 [[TMP17]], label [[BB58:%.*]], label [[BB115:%.*]]
+; CHECK:       bb115:
+; CHECK-NEXT:    [[TMP18:%.*]] = fmul <2 x double> zeroinitializer, [[TMP4]]
+; CHECK-NEXT:    [[TMP19:%.*]] = extractelement <2 x double> [[TMP18]], i32 0
+; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <2 x double> [[TMP18]], i32 1
+; CHECK-NEXT:    [[I118:%.*]] = fadd double [[TMP19]], [[TMP20]]
+; CHECK-NEXT:    [[TMP21:%.*]] = fmul <4 x double> zeroinitializer, [[TMP1]]
+; CHECK-NEXT:    [[TMP22:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP23:%.*]] = shufflevector <4 x double> <double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double poison>, <4 x double> [[TMP22]], <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT:    [[TMP24:%.*]] = fadd <4 x double> [[TMP21]], [[TMP23]]
+; CHECK-NEXT:    [[TMP25:%.*]] = fadd <4 x double> [[TMP24]], zeroinitializer
+; CHECK-NEXT:    [[TMP26:%.*]] = select <4 x i1> zeroinitializer, <4 x double> zeroinitializer, <4 x double> [[TMP25]]
+; CHECK-NEXT:    [[TMP27:%.*]] = fmul <4 x double> [[TMP26]], zeroinitializer
+; CHECK-NEXT:    [[TMP28:%.*]] = fmul <4 x double> [[TMP27]], zeroinitializer
+; CHECK-NEXT:    [[TMP29:%.*]] = fptosi <4 x double> [[TMP28]] to <4 x i32>
+; CHECK-NEXT:    [[TMP30:%.*]] = or <4 x i32> zeroinitializer, [[TMP29]]
+; CHECK-NEXT:    [[TMP31:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP30]])
+; CHECK-NEXT:    [[OP_RDX:%.*]] = icmp slt i32 [[TMP31]], 32000
+; CHECK-NEXT:    [[OP_RDX1:%.*]] = select i1 [[OP_RDX]], i32 [[TMP31]], i32 32000
+; CHECK-NEXT:    [[I163:%.*]] = fcmp ogt double [[I118]], 0.000000e+00
+; CHECK-NEXT:    [[I164:%.*]] = icmp slt i32 0, [[OP_RDX1]]
+; CHECK-NEXT:    unreachable
+; CHECK:       bb58:
+; CHECK-NEXT:    ret void
+;
+bb:
+  %i75 = fsub double 0.000000e+00, 0.000000e+00
+  %i76 = fsub double 0.000000e+00, 0.000000e+00
+  %i77 = fmul double 0.000000e+00, %i75
+  %i78 = fmul double 0.000000e+00, %i76
+  %i79 = fadd double %i78, 0.000000e+00
+  %i80 = fadd double %i79, 0.000000e+00
+  %i81 = fcmp ult double %i80, 0.000000e+00
+  %i82 = fsub double 0.000000e+00, poison
+  %i83 = fmul double 0.000000e+00, %i82
+  %i84 = fadd double 0.000000e+00, %i83
+  %i85 = fadd double %i84, 0.000000e+00
+  %i86 = fcmp ult double %i85, 0.000000e+00
+  %i87 = fsub double 0.000000e+00, %i
+  %i88 = fadd double 0.000000e+00, %i77
+  %i89 = fadd double %i88, 0.000000e+00
+  %i90 = fcmp ult double %i89, 0.000000e+00
+  %i91 = fsub double 0.000000e+00, 0.000000e+00
+  %i92 = fmul double poison, 0.000000e+00
+  %i93 = fadd double %i92, 0.000000e+00
+  %i94 = fadd double %i93, 0.000000e+00
+  %i95 = fcmp ult double %i94, 0.000000e+00
+  %i96 = fadd double %i79, 0.000000e+00
+  %i97 = fcmp ult double %i96, 0.000000e+00
+  %i98 = fadd double %i84, 0.000000e+00
+  %i99 = fcmp ult double %i98, 0.000000e+00
+  %i100 = fadd double 0.000000e+00, %i77
+  %i101 = fadd double %i100, 0.000000e+00
+  %i102 = fcmp ult double %i101, 0.000000e+00
+  %i103 = fsub double 0.000000e+00, %i
+  %i104 = fmul double poison, 0.000000e+00
+  %i105 = fadd double %i104, 0.000000e+00
+  %i106 = fadd double %i105, 0.000000e+00
+  %i107 = fcmp ult double %i106, 0.000000e+00
+  %i108 = select i1 %i107, i1 %i102, i1 false
+  %i109 = select i1 %i108, i1 %i99, i1 false
+  %i110 = select i1 %i109, i1 %i97, i1 false
+  %i111 = select i1 %i110, i1 %i95, i1 false
+  %i112 = select i1 %i111, i1 %i90, i1 false
+  %i113 = select i1 %i112, i1 %i86, i1 false
+  %i114 = select i1 %i113, i1 %i81, i1 false
+  br i1 %i114, label %bb58, label %bb115
+
+bb115:
+  %i116 = fmul double 0.000000e+00, %i103
+  %i117 = fmul double 0.000000e+00, %i82
+  %i118 = fadd double %i116, %i117
+  %i120 = fmul double 0.000000e+00, %i75
+  %i121 = fmul double 0.000000e+00, %i76
+  %i122 = fadd double %i121, 0.000000e+00
+  %i123 = fadd double 0.000000e+00, %i120
+  %i124 = fmul double 0.000000e+00, %i91
+  %i125 = fadd double %i124, %i82
+  %i126 = fadd double %i125, 0.000000e+00
+  %i127 = fmul double 0.000000e+00, %i87
+  %i128 = fadd double %i127, 0.000000e+00
+  %i129 = fadd double %i128, 0.000000e+00
+  %i130 = fadd double %i122, 0.000000e+00
+  %i131 = fadd double %i123, 0.000000e+00
+  %i132 = select i1 false, double 0.000000e+00, double %i131
+  %i133 = fmul double %i132, 0.000000e+00
+  %i134 = fmul double %i133, 0.000000e+00
+  %i135 = fptosi double %i134 to i32
+  %i136 = or i32 0, %i135
+  %i137 = icmp slt i32 %i136, 32000
+  %i138 = select i1 %i137, i32 %i136, i32 32000
+  %i139 = select i1 false, double 0.000000e+00, double %i130
+  %i140 = fmul double %i139, 0.000000e+00
+  %i141 = fmul double %i140, 0.000000e+00
+  %i142 = fptosi double %i141 to i32
+  %i143 = or i32 0, %i142
+  %i144 = icmp slt i32 %i143, %i138
+  %i145 = select i1 %i144, i32 %i143, i32 %i138
+  %i146 = select i1 false, double 0.000000e+00, double %i129
+  %i147 = fmul double %i146, 0.000000e+00
+  %i148 = fmul double %i147, 0.000000e+00
+  %i149 = fptosi double %i148 to i32
+  %i150 = or i32 0, %i149
+  %i151 = icmp slt i32 %i150, %i145
+  %i152 = select i1 %i151, i32 %i150, i32 %i145
+  %i153 = select i1 false, double 0.000000e+00, double %i126
+  %i154 = fmul double %i153, 0.000000e+00
+  %i155 = fmul double %i154, 0.000000e+00
+  %i156 = fptosi double %i155 to i32
+  %i157 = or i32 0, %i156
+  %i158 = icmp slt i32 %i157, %i152
+  %i159 = select i1 %i158, i32 %i157, i32 %i152
+  %i163 = fcmp ogt double %i118, 0.000000e+00
+  %i164 = icmp slt i32 0, %i159
+  unreachable
+
+bb58:
+  ret void
+}
diff --git a/llvm/test/lit.cfg.py b/llvm/test/lit.cfg.py
index c6f9ee82e08cc1..74e7769a480598 100644
--- a/llvm/test/lit.cfg.py
+++ b/llvm/test/lit.cfg.py
@@ -414,10 +414,11 @@ def version_int(ver):
     config.available_features.add("llvm-dylib")
     config.substitutions.append(
         (
+            # libLLVM.so.19.0git
             "%llvmdylib",
-            "{}/libLLVM-{}{}".format(
-                config.llvm_shlib_dir, config.llvm_dylib_version, config.llvm_shlib_ext
-            ),
+            "{}/libLLVM{}.{}".format(
+                config.llvm_shlib_dir, config.llvm_shlib_ext, config.llvm_dylib_version
+            )
         )
     )
 
diff --git a/llvm/test/lit.site.cfg.py.in b/llvm/test/lit.site.cfg.py.in
index 1138b2ccf7bce7..b6f255d472d16f 100644
--- a/llvm/test/lit.site.cfg.py.in
+++ b/llvm/test/lit.site.cfg.py.in
@@ -44,7 +44,7 @@ config.build_examples = @LLVM_BUILD_EXAMPLES@
 config.enable_threads = @LLVM_ENABLE_THREADS@
 config.build_shared_libs = @BUILD_SHARED_LIBS@
 config.link_llvm_dylib = @LLVM_LINK_LLVM_DYLIB@
-config.llvm_dylib_version = "@LLVM_VERSION_MAJOR@@LLVM_VERSION_SUFFIX@"
+config.llvm_dylib_version = "@LLVM_VERSION_MAJOR@.@LLVM_VERSION_MINOR@@LLVM_VERSION_SUFFIX@"
 config.llvm_host_triple = '@LLVM_HOST_TRIPLE@'
 config.host_arch = "@HOST_ARCH@"
 config.have_opt_viewer_modules = @LLVM_HAVE_OPT_VIEWER_MODULES@
diff --git a/llvm/test/tools/llvm-objcopy/ELF/binary-output-target.test b/llvm/test/tools/llvm-objcopy/ELF/binary-output-target.test
index fc5856691f8dca..f88b7575002a94 100644
--- a/llvm/test/tools/llvm-objcopy/ELF/binary-output-target.test
+++ b/llvm/test/tools/llvm-objcopy/ELF/binary-output-target.test
@@ -48,6 +48,9 @@
 # RUN: llvm-objcopy -I binary -O elf64-loongarch %t.txt %t.la64.o
 # RUN: llvm-readobj --file-headers %t.la64.o | FileCheck %s --check-prefixes=CHECK,LE,LA64,64
 
+# RUN: llvm-objcopy -I binary -O elf64-s390 %t.txt %t.s390x.o
+# RUN: llvm-readobj --file-headers %t.s390x.o | FileCheck %s --check-prefixes=CHECK,BE,S390X,64
+
 # CHECK: Format:
 # 32-SAME:      elf32-
 # 64-SAME:      elf64-
@@ -64,6 +67,7 @@
 # PPCLE-SAME:   powerpcle{{$}}
 # SPARC-SAME:   sparc
 # SPARCEL-SAME: sparc
+# S390X-SAME:   s390
 # X86-64-SAME:  x86-64
 
 # AARCH64-NEXT: Arch: aarch64
@@ -81,6 +85,7 @@
 # RISCV64-NEXT: Arch: riscv64
 # SPARC-NEXT:   Arch: sparc{{$}}
 # SPARCEL-NEXT: Arch: sparcel
+# S390X-NEXT:   Arch: s390x
 # X86-64-NEXT:  Arch: x86_64
 
 # 32-NEXT:      AddressSize: 32bit
@@ -116,6 +121,7 @@
 # RISCV64-NEXT:   Machine: EM_RISCV (0xF3)
 # SPARC-NEXT:     Machine: EM_SPARC (0x2)
 # SPARCEL-NEXT:   Machine: EM_SPARC (0x2)
+# S390X-NEXT:     Machine: EM_S390 (0x16)
 # X86-64-NEXT:    Machine: EM_X86_64 (0x3E)
 
 # CHECK-NEXT:     Version: 1
diff --git a/llvm/test/tools/llvm-objcopy/ELF/cross-arch-headers.test b/llvm/test/tools/llvm-objcopy/ELF/cross-arch-headers.test
index 882940c05e19c2..9a8128611792d5 100644
--- a/llvm/test/tools/llvm-objcopy/ELF/cross-arch-headers.test
+++ b/llvm/test/tools/llvm-objcopy/ELF/cross-arch-headers.test
@@ -117,6 +117,10 @@
 # RUN: llvm-readobj --file-headers %t.elf64_loongarch.o | FileCheck %s --check-prefixes=CHECK,LE,LA64,64,SYSV
 # RUN: llvm-readobj --file-headers %t.elf64_loongarch.dwo | FileCheck %s --check-prefixes=CHECK,LE,LA64,64,SYSV
 
+# RUN: llvm-objcopy %t.o -O elf64-s390 %t.elf64_s390.o --split-dwo=%t.elf64_s390.dwo
+# RUN: llvm-readobj --file-headers %t.elf64_s390.o | FileCheck %s --check-prefixes=CHECK,BE,S390X,64,SYSV
+# RUN: llvm-readobj --file-headers %t.elf64_s390.dwo | FileCheck %s --check-prefixes=CHECK,BE,S390X,64,SYSV
+
 !ELF
 FileHeader:
   Class:           ELFCLASS32
@@ -160,6 +164,7 @@ Symbols:
 # RISCV32-SAME: riscv{{$}}
 # RISCV64-SAME: riscv{{$}}
 # SPARC-SAME:   sparc
+# S390X-SAME:   s390
 # X86-64-SAME:  x86-64
 # DEFAULT-SAME: unknown
 
@@ -182,6 +187,7 @@ Symbols:
 # RISCV64-NEXT:  Arch: riscv64
 # SPARC-NEXT:    Arch: sparc{{$}}
 # SPARCEL-NEXT:  Arch: sparcel
+# S390X-NEXT:    Arch: s390x
 # X86-64-NEXT:   Arch: x86_64
 # DEFAULT-NEXT:  Arch: unknown
 
@@ -210,6 +216,7 @@ Symbols:
 # RISCV32: Machine: EM_RISCV (0xF3)
 # RISCV64: Machine: EM_RISCV (0xF3)
 # SPARC:   Machine: EM_SPARC (0x2)
+# S390X:   Machine: EM_S390 (0x16)
 # X86-64:  Machine: EM_X86_64 (0x3E)
 
 # 32: HeaderSize: 52
diff --git a/llvm/test/tools/llvm-objdump/openbsd-headers.test b/llvm/test/tools/llvm-objdump/openbsd-headers.test
index f547854feeeedf..84fa59bdf89f5c 100644
--- a/llvm/test/tools/llvm-objdump/openbsd-headers.test
+++ b/llvm/test/tools/llvm-objdump/openbsd-headers.test
@@ -11,6 +11,8 @@
 # CHECK-NEXT:      filesz 0x0000000000000000 memsz 0x0000000000000000 flags ---
 # CHECK-NEXT: OPENBSD_NOBTCFI off    0x0000000000000000 vaddr 0x0000000000000000 paddr 0x0000000000000000 align 2**0
 # CHECK-NEXT:      filesz 0x0000000000000000 memsz 0x0000000000000000 flags ---
+# CHECK-NEXT: OPENBSD_SYSCALLS off    0x0000000000000000 vaddr 0x0000000000000000 paddr 0x0000000000000000 align 2**0
+# CHECK-NEXT:      filesz 0x0000000000000000 memsz 0x0000000000000000 flags ---
 # CHECK-NEXT: OPENBSD_BOOTDATA off    0x0000000000000000 vaddr 0x0000000000000000 paddr 0x0000000000000000 align 2**0
 # CHECK-NEXT:      filesz 0x0000000000000000 memsz 0x0000000000000000 flags ---
 
@@ -25,4 +27,5 @@ ProgramHeaders:
   - Type: 0x65a3dbe6 ## PT_OPENBSD_RANDOMIZE
   - Type: 0x65a3dbe7 ## PT_OPENBSD_WXNEEDED
   - Type: 0x65a3dbe8 ## PT_OPENBSD_NOBTCFI
+  - Type: 0x65a3dbe9 ## PT_OPENBSD_SYSCALLS
   - Type: 0x65a41be6 ## PT_OPENBSD_BOOTDATA
diff --git a/llvm/test/tools/llvm-readobj/ELF/program-headers.test b/llvm/test/tools/llvm-readobj/ELF/program-headers.test
index 702a06b6403f0a..856cf378ddad95 100644
--- a/llvm/test/tools/llvm-readobj/ELF/program-headers.test
+++ b/llvm/test/tools/llvm-readobj/ELF/program-headers.test
@@ -29,68 +29,70 @@
 # RUN:   FileCheck %s --check-prefixes=ELF64,MAPPING --strict-whitespace --match-full-lines
 # RUN: llvm-readobj -l %t64.elf | FileCheck %s --check-prefixes=ELF-LLVM,ELF64-LLVM
 
-#       ELF32:There are 25 program headers, starting at offset 52
+#       ELF32:There are 26 program headers, starting at offset 52
 # ELF32-EMPTY:
 # ELF32-NEXT:Program Headers:
 # ELF32-NEXT:  Type           Offset   VirtAddr   PhysAddr   FileSiz MemSiz  Flg Align
-# ELF32-NEXT:  PHDR           0x000354 0x00001000 0x00001000 0x00003 0x00003  W  0x1
-# ELF32-NEXT:  PHDR           0x000357 0x00002000 0x00002000 0x00007 0x00007   E 0x1
-# ELF32-NEXT:  NULL           0x000357 0x00002000 0x00002000 0x00007 0x00007   E 0x1
-# ELF32-NEXT:  DYNAMIC        0x000354 0x00001000 0x00001000 0x00003 0x00003 RWE 0x1
-# ELF32-NEXT:  INTERP         0x00035e 0x00003000 0x00003000 0x00004 0x00004 RW  0x1
+# ELF32-NEXT:  PHDR           0x000374 0x00001000 0x00001000 0x00003 0x00003  W  0x1
+# ELF32-NEXT:  PHDR           0x000377 0x00002000 0x00002000 0x00007 0x00007   E 0x1
+# ELF32-NEXT:  NULL           0x000377 0x00002000 0x00002000 0x00007 0x00007   E 0x1
+# ELF32-NEXT:  DYNAMIC        0x000374 0x00001000 0x00001000 0x00003 0x00003 RWE 0x1
+# ELF32-NEXT:  INTERP         0x00037e 0x00003000 0x00003000 0x00004 0x00004 RW  0x1
 # ELF32-NEXT:      [Requesting program interpreter: ABC]
-# ELF32-NEXT:  NOTE           0x000354 0x00001000 0x00001000 0x00003 0x00003     0x1
-# ELF32-NEXT:  SHLIB          0x000354 0x00001000 0x00001000 0x00001 0x00001     0x1
-# ELF32-NEXT:  TLS            0x000362 0x00004000 0x00004000 0x00001 0x00001     0x1
-# ELF32-NEXT:  <unknown>: 0x60000000 0x000354 0x00001000 0x00001000 0x00003 0x00003     0x1
-# ELF32-NEXT:  GNU_EH_FRAME   0x000354 0x00001000 0x00001000 0x00003 0x00003     0x1
-# ELF32-NEXT:  SUNW_UNWIND    0x000354 0x00001000 0x00001000 0x00003 0x00003     0x1
-# ELF32-NEXT:  GNU_STACK      0x000354 0x00001000 0x00001000 0x00003 0x00003     0x1
-# ELF32-NEXT:  GNU_RELRO      0x000354 0x00001000 0x00001000 0x00003 0x00003     0x1
-# ELF32-NEXT:  GNU_PROPERTY   0x000354 0x00001000 0x00001000 0x00003 0x00003     0x1
-# ELF32-NEXT:  OPENBSD_MUTABLE 0x000354 0x00001000 0x00001000 0x00003 0x00003     0x1
-# ELF32-NEXT:  OPENBSD_RANDOMIZE 0x000354 0x00001000 0x00001000 0x00003 0x00003     0x1
-# ELF32-NEXT:  OPENBSD_WXNEEDED 0x000354 0x00001000 0x00001000 0x00003 0x00003     0x1
-# ELF32-NEXT:  OPENBSD_NOBTCFI 0x000354 0x00001000 0x00001000 0x00003 0x00003     0x1
-# ELF32-NEXT:  OPENBSD_BOOTDATA 0x000354 0x00001000 0x00001000 0x00003 0x00003     0x1
-# ELF32-NEXT:  <unknown>: 0x6fffffff 0x000354 0x00001000 0x00001000 0x00003 0x00003     0x1
-# ELF32-NEXT:  <unknown>: 0x70000000 0x000354 0x00001000 0x00001000 0x00003 0x00003     0x1
-# ELF32-NEXT:  <unknown>: 0x70000001 0x000354 0x00001000 0x00001000 0x00003 0x00003     0x1
-# ELF32-NEXT:  <unknown>: 0x70000002 0x000354 0x00001000 0x00001000 0x00003 0x00003     0x1
-# ELF32-NEXT:  <unknown>: 0x70000003 0x000354 0x00001000 0x00001000 0x00003 0x00003     0x1
-# ELF32-NEXT:  <unknown>: 0x7fffffff 0x000354 0x00001000 0x00001000 0x00003 0x00003     0x1
+# ELF32-NEXT:  NOTE           0x000374 0x00001000 0x00001000 0x00003 0x00003     0x1
+# ELF32-NEXT:  SHLIB          0x000374 0x00001000 0x00001000 0x00001 0x00001     0x1
+# ELF32-NEXT:  TLS            0x000382 0x00004000 0x00004000 0x00001 0x00001     0x1
+# ELF32-NEXT:  <unknown>: 0x60000000 0x000374 0x00001000 0x00001000 0x00003 0x00003     0x1
+# ELF32-NEXT:  GNU_EH_FRAME   0x000374 0x00001000 0x00001000 0x00003 0x00003     0x1
+# ELF32-NEXT:  SUNW_UNWIND    0x000374 0x00001000 0x00001000 0x00003 0x00003     0x1
+# ELF32-NEXT:  GNU_STACK      0x000374 0x00001000 0x00001000 0x00003 0x00003     0x1
+# ELF32-NEXT:  GNU_RELRO      0x000374 0x00001000 0x00001000 0x00003 0x00003     0x1
+# ELF32-NEXT:  GNU_PROPERTY   0x000374 0x00001000 0x00001000 0x00003 0x00003     0x1
+# ELF32-NEXT:  OPENBSD_MUTABLE 0x000374 0x00001000 0x00001000 0x00003 0x00003     0x1
+# ELF32-NEXT:  OPENBSD_RANDOMIZE 0x000374 0x00001000 0x00001000 0x00003 0x00003     0x1
+# ELF32-NEXT:  OPENBSD_WXNEEDED 0x000374 0x00001000 0x00001000 0x00003 0x00003     0x1
+# ELF32-NEXT:  OPENBSD_NOBTCFI 0x000374 0x00001000 0x00001000 0x00003 0x00003     0x1
+# ELF32-NEXT:  OPENBSD_SYSCALLS 0x000374 0x00001000 0x00001000 0x00003 0x00003     0x1
+# ELF32-NEXT:  OPENBSD_BOOTDATA 0x000374 0x00001000 0x00001000 0x00003 0x00003     0x1
+# ELF32-NEXT:  <unknown>: 0x6fffffff 0x000374 0x00001000 0x00001000 0x00003 0x00003     0x1
+# ELF32-NEXT:  <unknown>: 0x70000000 0x000374 0x00001000 0x00001000 0x00003 0x00003     0x1
+# ELF32-NEXT:  <unknown>: 0x70000001 0x000374 0x00001000 0x00001000 0x00003 0x00003     0x1
+# ELF32-NEXT:  <unknown>: 0x70000002 0x000374 0x00001000 0x00001000 0x00003 0x00003     0x1
+# ELF32-NEXT:  <unknown>: 0x70000003 0x000374 0x00001000 0x00001000 0x00003 0x00003     0x1
+# ELF32-NEXT:  <unknown>: 0x7fffffff 0x000374 0x00001000 0x00001000 0x00003 0x00003     0x1
 # ELF32-EMPTY:
 
-#       ELF64:There are 25 program headers, starting at offset 64
+#       ELF64:There are 26 program headers, starting at offset 64
 # ELF64-EMPTY:
 # ELF64-NEXT:Program Headers:
 # ELF64-NEXT:  Type           Offset   VirtAddr           PhysAddr           FileSiz  MemSiz   Flg Align
-# ELF64-NEXT:  PHDR           0x0005b8 0x0000000000001000 0x0000000000001000 0x000003 0x000003  W  0x1
-# ELF64-NEXT:  PHDR           0x0005bb 0x0000000000002000 0x0000000000002000 0x000007 0x000007   E 0x1
-# ELF64-NEXT:  NULL           0x0005bb 0x0000000000002000 0x0000000000002000 0x000007 0x000007   E 0x1
-# ELF64-NEXT:  DYNAMIC        0x0005b8 0x0000000000001000 0x0000000000001000 0x000003 0x000003 RWE 0x1
-# ELF64-NEXT:  INTERP         0x0005c2 0x0000000000003000 0x0000000000003000 0x000004 0x000004 RW  0x1
+# ELF64-NEXT:  PHDR           0x0005f0 0x0000000000001000 0x0000000000001000 0x000003 0x000003  W  0x1
+# ELF64-NEXT:  PHDR           0x0005f3 0x0000000000002000 0x0000000000002000 0x000007 0x000007   E 0x1
+# ELF64-NEXT:  NULL           0x0005f3 0x0000000000002000 0x0000000000002000 0x000007 0x000007   E 0x1
+# ELF64-NEXT:  DYNAMIC        0x0005f0 0x0000000000001000 0x0000000000001000 0x000003 0x000003 RWE 0x1
+# ELF64-NEXT:  INTERP         0x0005fa 0x0000000000003000 0x0000000000003000 0x000004 0x000004 RW  0x1
 # ELF64-NEXT:      [Requesting program interpreter: ABC]
-# ELF64-NEXT:  NOTE           0x0005b8 0x0000000000001000 0x0000000000001000 0x000003 0x000003     0x1
-# ELF64-NEXT:  SHLIB          0x0005b8 0x0000000000001000 0x0000000000001000 0x000001 0x000001     0x1
-# ELF64-NEXT:  TLS            0x0005c6 0x0000000000004000 0x0000000000004000 0x000001 0x000001     0x1
-# ELF64-NEXT:  <unknown>: 0x60000000 0x0005b8 0x0000000000001000 0x0000000000001000 0x000003 0x000003     0x1
-# ELF64-NEXT:  GNU_EH_FRAME   0x0005b8 0x0000000000001000 0x0000000000001000 0x000003 0x000003     0x1
-# ELF64-NEXT:  SUNW_UNWIND    0x0005b8 0x0000000000001000 0x0000000000001000 0x000003 0x000003     0x1
-# ELF64-NEXT:  GNU_STACK      0x0005b8 0x0000000000001000 0x0000000000001000 0x000003 0x000003     0x1
-# ELF64-NEXT:  GNU_RELRO      0x0005b8 0x0000000000001000 0x0000000000001000 0x000003 0x000003     0x1
-# ELF64-NEXT:  GNU_PROPERTY   0x0005b8 0x0000000000001000 0x0000000000001000 0x000003 0x000003     0x1
-# ELF64-NEXT:  OPENBSD_MUTABLE 0x0005b8 0x0000000000001000 0x0000000000001000 0x000003 0x000003     0x1
-# ELF64-NEXT:  OPENBSD_RANDOMIZE 0x0005b8 0x0000000000001000 0x0000000000001000 0x000003 0x000003     0x1
-# ELF64-NEXT:  OPENBSD_WXNEEDED 0x0005b8 0x0000000000001000 0x0000000000001000 0x000003 0x000003     0x1
-# ELF64-NEXT:  OPENBSD_NOBTCFI 0x0005b8 0x0000000000001000 0x0000000000001000 0x000003 0x000003     0x1
-# ELF64-NEXT:  OPENBSD_BOOTDATA 0x0005b8 0x0000000000001000 0x0000000000001000 0x000003 0x000003     0x1
-# ELF64-NEXT:  <unknown>: 0x6fffffff 0x0005b8 0x0000000000001000 0x0000000000001000 0x000003 0x000003     0x1
-# ELF64-NEXT:  <unknown>: 0x70000000 0x0005b8 0x0000000000001000 0x0000000000001000 0x000003 0x000003     0x1
-# ELF64-NEXT:  <unknown>: 0x70000001 0x0005b8 0x0000000000001000 0x0000000000001000 0x000003 0x000003     0x1
-# ELF64-NEXT:  <unknown>: 0x70000002 0x0005b8 0x0000000000001000 0x0000000000001000 0x000003 0x000003     0x1
-# ELF64-NEXT:  <unknown>: 0x70000003 0x0005b8 0x0000000000001000 0x0000000000001000 0x000003 0x000003     0x1
-# ELF64-NEXT:  <unknown>: 0x7fffffff 0x0005b8 0x0000000000001000 0x0000000000001000 0x000003 0x000003     0x1
+# ELF64-NEXT:  NOTE           0x0005f0 0x0000000000001000 0x0000000000001000 0x000003 0x000003     0x1
+# ELF64-NEXT:  SHLIB          0x0005f0 0x0000000000001000 0x0000000000001000 0x000001 0x000001     0x1
+# ELF64-NEXT:  TLS            0x0005fe 0x0000000000004000 0x0000000000004000 0x000001 0x000001     0x1
+# ELF64-NEXT:  <unknown>: 0x60000000 0x0005f0 0x0000000000001000 0x0000000000001000 0x000003 0x000003     0x1
+# ELF64-NEXT:  GNU_EH_FRAME   0x0005f0 0x0000000000001000 0x0000000000001000 0x000003 0x000003     0x1
+# ELF64-NEXT:  SUNW_UNWIND    0x0005f0 0x0000000000001000 0x0000000000001000 0x000003 0x000003     0x1
+# ELF64-NEXT:  GNU_STACK      0x0005f0 0x0000000000001000 0x0000000000001000 0x000003 0x000003     0x1
+# ELF64-NEXT:  GNU_RELRO      0x0005f0 0x0000000000001000 0x0000000000001000 0x000003 0x000003     0x1
+# ELF64-NEXT:  GNU_PROPERTY   0x0005f0 0x0000000000001000 0x0000000000001000 0x000003 0x000003     0x1
+# ELF64-NEXT:  OPENBSD_MUTABLE 0x0005f0 0x0000000000001000 0x0000000000001000 0x000003 0x000003     0x1
+# ELF64-NEXT:  OPENBSD_RANDOMIZE 0x0005f0 0x0000000000001000 0x0000000000001000 0x000003 0x000003     0x1
+# ELF64-NEXT:  OPENBSD_WXNEEDED 0x0005f0 0x0000000000001000 0x0000000000001000 0x000003 0x000003     0x1
+# ELF64-NEXT:  OPENBSD_NOBTCFI 0x0005f0 0x0000000000001000 0x0000000000001000 0x000003 0x000003     0x1
+# ELF64-NEXT:  OPENBSD_SYSCALLS 0x0005f0 0x0000000000001000 0x0000000000001000 0x000003 0x000003     0x1
+# ELF64-NEXT:  OPENBSD_BOOTDATA 0x0005f0 0x0000000000001000 0x0000000000001000 0x000003 0x000003     0x1
+# ELF64-NEXT:  <unknown>: 0x6fffffff 0x0005f0 0x0000000000001000 0x0000000000001000 0x000003 0x000003     0x1
+# ELF64-NEXT:  <unknown>: 0x70000000 0x0005f0 0x0000000000001000 0x0000000000001000 0x000003 0x000003     0x1
+# ELF64-NEXT:  <unknown>: 0x70000001 0x0005f0 0x0000000000001000 0x0000000000001000 0x000003 0x000003     0x1
+# ELF64-NEXT:  <unknown>: 0x70000002 0x0005f0 0x0000000000001000 0x0000000000001000 0x000003 0x000003     0x1
+# ELF64-NEXT:  <unknown>: 0x70000003 0x0005f0 0x0000000000001000 0x0000000000001000 0x000003 0x000003     0x1
+# ELF64-NEXT:  <unknown>: 0x7fffffff 0x0005f0 0x0000000000001000 0x0000000000001000 0x000003 0x000003     0x1
 # ELF64-EMPTY:
 
 #      MAPPING: Section to Segment mapping:
@@ -120,13 +122,14 @@
 # MAPPING-NEXT:   22     .foo.begin .foo.end {{$}}
 # MAPPING-NEXT:   23     .foo.begin .foo.end {{$}}
 # MAPPING-NEXT:   24     .foo.begin .foo.end {{$}}
+# MAPPING-NEXT:   25     .foo.begin .foo.end {{$}}
 # MAPPING-NEXT:   None   .unused .strtab .shstrtab {{$}}
 
 # ELF-LLVM:      ProgramHeaders [
 # ELF-LLVM-NEXT:   ProgramHeader {
 # ELF-LLVM-NEXT:     Type: PT_PHDR (0x6)
-# ELF32-LLVM-NEXT:   Offset: 0x354
-# ELF64-LLVM-NEXT:   Offset: 0x5B8
+# ELF32-LLVM-NEXT:   Offset: 0x374
+# ELF64-LLVM-NEXT:   Offset: 0x5F0
 # ELF-LLVM-NEXT:     VirtualAddress: 0x1000
 # ELF-LLVM-NEXT:     PhysicalAddress: 0x1000
 # ELF-LLVM-NEXT:     FileSize: 3
@@ -138,8 +141,8 @@
 # ELF-LLVM-NEXT:   }
 # ELF-LLVM-NEXT:   ProgramHeader {
 # ELF-LLVM-NEXT:     Type: PT_PHDR (0x6)
-# ELF32-LLVM-NEXT:   Offset: 0x357
-# ELF64-LLVM-NEXT:   Offset: 0x5BB
+# ELF32-LLVM-NEXT:   Offset: 0x377
+# ELF64-LLVM-NEXT:   Offset: 0x5F3
 # ELF-LLVM-NEXT:     VirtualAddress: 0x2000
 # ELF-LLVM-NEXT:     PhysicalAddress: 0x2000
 # ELF-LLVM-NEXT:     FileSize: 7
@@ -151,8 +154,8 @@
 # ELF-LLVM-NEXT:   }
 # ELF-LLVM-NEXT:   ProgramHeader {
 # ELF-LLVM-NEXT:     Type: PT_NULL (0x0)
-# ELF32-LLVM-NEXT:   Offset: 0x357
-# ELF64-LLVM-NEXT:   Offset: 0x5BB
+# ELF32-LLVM-NEXT:   Offset: 0x377
+# ELF64-LLVM-NEXT:   Offset: 0x5F3
 # ELF-LLVM-NEXT:     VirtualAddress: 0x2000
 # ELF-LLVM-NEXT:     PhysicalAddress: 0x2000
 # ELF-LLVM-NEXT:     FileSize: 7
@@ -164,8 +167,8 @@
 # ELF-LLVM-NEXT:   }
 # ELF-LLVM-NEXT:   ProgramHeader {
 # ELF-LLVM-NEXT:     Type: PT_DYNAMIC (0x2)
-# ELF32-LLVM-NEXT:   Offset: 0x354
-# ELF64-LLVM-NEXT:   Offset: 0x5B8
+# ELF32-LLVM-NEXT:   Offset: 0x374
+# ELF64-LLVM-NEXT:   Offset: 0x5F0
 # ELF-LLVM-NEXT:     VirtualAddress: 0x1000
 # ELF-LLVM-NEXT:     PhysicalAddress: 0x1000
 # ELF-LLVM-NEXT:     FileSize: 3
@@ -179,8 +182,8 @@
 # ELF-LLVM-NEXT:   }
 # ELF-LLVM-NEXT:   ProgramHeader {
 # ELF-LLVM-NEXT:     Type: PT_INTERP (0x3)
-# ELF32-LLVM-NEXT:   Offset: 0x35E
-# ELF64-LLVM-NEXT:   Offset: 0x5C2
+# ELF32-LLVM-NEXT:   Offset: 0x37E
+# ELF64-LLVM-NEXT:   Offset: 0x5FA
 # ELF-LLVM-NEXT:     VirtualAddress: 0x3000
 # ELF-LLVM-NEXT:     PhysicalAddress: 0x3000
 # ELF-LLVM-NEXT:     FileSize: 4
@@ -193,8 +196,8 @@
 # ELF-LLVM-NEXT:   }
 # ELF-LLVM-NEXT:   ProgramHeader {
 # ELF-LLVM-NEXT:     Type: PT_NOTE (0x4)
-# ELF32-LLVM-NEXT:   Offset: 0x354
-# ELF64-LLVM-NEXT:   Offset: 0x5B8
+# ELF32-LLVM-NEXT:   Offset: 0x374
+# ELF64-LLVM-NEXT:   Offset: 0x5F0
 # ELF-LLVM-NEXT:     VirtualAddress: 0x1000
 # ELF-LLVM-NEXT:     PhysicalAddress: 0x1000
 # ELF-LLVM-NEXT:     FileSize: 3
@@ -205,8 +208,8 @@
 # ELF-LLVM-NEXT:   }
 # ELF-LLVM-NEXT:   ProgramHeader {
 # ELF-LLVM-NEXT:     Type: PT_SHLIB (0x5)
-# ELF32-LLVM-NEXT:   Offset: 0x354
-# ELF64-LLVM-NEXT:   Offset: 0x5B8
+# ELF32-LLVM-NEXT:   Offset: 0x374
+# ELF64-LLVM-NEXT:   Offset: 0x5F0
 # ELF-LLVM-NEXT:     VirtualAddress: 0x1000
 # ELF-LLVM-NEXT:     PhysicalAddress: 0x1000
 # ELF-LLVM-NEXT:     FileSize: 1
@@ -217,8 +220,8 @@
 # ELF-LLVM-NEXT:   }
 # ELF-LLVM-NEXT:   ProgramHeader {
 # ELF-LLVM-NEXT:     Type: PT_TLS (0x7)
-# ELF32-LLVM-NEXT:   Offset: 0x362
-# ELF64-LLVM-NEXT:   Offset: 0x5C6
+# ELF32-LLVM-NEXT:   Offset: 0x382
+# ELF64-LLVM-NEXT:   Offset: 0x5FE
 # ELF-LLVM-NEXT:     VirtualAddress: 0x4000
 # ELF-LLVM-NEXT:     PhysicalAddress: 0x4000
 # ELF-LLVM-NEXT:     FileSize: 1
@@ -229,8 +232,8 @@
 # ELF-LLVM-NEXT:   }
 # ELF-LLVM-NEXT:   ProgramHeader {
 # ELF-LLVM-NEXT:     Type: Unknown (0x60000000)
-# ELF32-LLVM-NEXT:   Offset: 0x354
-# ELF64-LLVM-NEXT:   Offset: 0x5B8
+# ELF32-LLVM-NEXT:   Offset: 0x374
+# ELF64-LLVM-NEXT:   Offset: 0x5F0
 # ELF-LLVM-NEXT:     VirtualAddress: 0x1000
 # ELF-LLVM-NEXT:     PhysicalAddress: 0x1000
 # ELF-LLVM-NEXT:     FileSize: 3
@@ -241,8 +244,8 @@
 # ELF-LLVM-NEXT:   }
 # ELF-LLVM-NEXT:   ProgramHeader {
 # ELF-LLVM-NEXT:     Type: PT_GNU_EH_FRAME (0x6474E550)
-# ELF32-LLVM-NEXT:   Offset: 0x354
-# ELF64-LLVM-NEXT:   Offset: 0x5B8
+# ELF32-LLVM-NEXT:   Offset: 0x374
+# ELF64-LLVM-NEXT:   Offset: 0x5F0
 # ELF-LLVM-NEXT:     VirtualAddress: 0x1000
 # ELF-LLVM-NEXT:     PhysicalAddress: 0x1000
 # ELF-LLVM-NEXT:     FileSize: 3
@@ -253,8 +256,8 @@
 # ELF-LLVM-NEXT:   }
 # ELF-LLVM-NEXT:   ProgramHeader {
 # ELF-LLVM-NEXT:     Type: PT_SUNW_UNWIND (0x6464E550)
-# ELF32-LLVM-NEXT:   Offset: 0x354
-# ELF64-LLVM-NEXT:   Offset: 0x5B8
+# ELF32-LLVM-NEXT:   Offset: 0x374
+# ELF64-LLVM-NEXT:   Offset: 0x5F0
 # ELF-LLVM-NEXT:     VirtualAddress: 0x1000
 # ELF-LLVM-NEXT:     PhysicalAddress: 0x1000
 # ELF-LLVM-NEXT:     FileSize: 3
@@ -265,8 +268,8 @@
 # ELF-LLVM-NEXT:   }
 # ELF-LLVM-NEXT:   ProgramHeader {
 # ELF-LLVM-NEXT:     Type: PT_GNU_STACK (0x6474E551)
-# ELF32-LLVM-NEXT:   Offset: 0x354
-# ELF64-LLVM-NEXT:   Offset: 0x5B8
+# ELF32-LLVM-NEXT:   Offset: 0x374
+# ELF64-LLVM-NEXT:   Offset: 0x5F0
 # ELF-LLVM-NEXT:     VirtualAddress: 0x1000
 # ELF-LLVM-NEXT:     PhysicalAddress: 0x1000
 # ELF-LLVM-NEXT:     FileSize: 3
@@ -277,8 +280,8 @@
 # ELF-LLVM-NEXT:   }
 # ELF-LLVM-NEXT:   ProgramHeader {
 # ELF-LLVM-NEXT:     Type: PT_GNU_RELRO (0x6474E552)
-# ELF32-LLVM-NEXT:   Offset: 0x354
-# ELF64-LLVM-NEXT:   Offset: 0x5B8
+# ELF32-LLVM-NEXT:   Offset: 0x374
+# ELF64-LLVM-NEXT:   Offset: 0x5F0
 # ELF-LLVM-NEXT:     VirtualAddress: 0x1000
 # ELF-LLVM-NEXT:     PhysicalAddress: 0x1000
 # ELF-LLVM-NEXT:     FileSize: 3
@@ -289,8 +292,8 @@
 # ELF-LLVM-NEXT:   }
 # ELF-LLVM-NEXT:   ProgramHeader {
 # ELF-LLVM-NEXT:     Type: PT_GNU_PROPERTY (0x6474E553)
-# ELF32-LLVM-NEXT:   Offset: 0x354
-# ELF64-LLVM-NEXT:   Offset: 0x5B8
+# ELF32-LLVM-NEXT:   Offset: 0x374
+# ELF64-LLVM-NEXT:   Offset: 0x5F0
 # ELF-LLVM-NEXT:     VirtualAddress: 0x1000
 # ELF-LLVM-NEXT:     PhysicalAddress: 0x1000
 # ELF-LLVM-NEXT:     FileSize: 3
@@ -301,8 +304,8 @@
 # ELF-LLVM-NEXT:   }
 # ELF-LLVM-NEXT:   ProgramHeader {
 # ELF-LLVM-NEXT:     Type: PT_OPENBSD_MUTABLE (0x65A3DBE5)
-# ELF32-LLVM-NEXT:   Offset: 0x354
-# ELF64-LLVM-NEXT:   Offset: 0x5B8
+# ELF32-LLVM-NEXT:   Offset: 0x374
+# ELF64-LLVM-NEXT:   Offset: 0x5F0
 # ELF-LLVM-NEXT:     VirtualAddress: 0x1000
 # ELF-LLVM-NEXT:     PhysicalAddress: 0x1000
 # ELF-LLVM-NEXT:     FileSize: 3
@@ -313,8 +316,8 @@
 # ELF-LLVM-NEXT:   }
 # ELF-LLVM-NEXT:   ProgramHeader {
 # ELF-LLVM-NEXT:     Type: PT_OPENBSD_RANDOMIZE (0x65A3DBE6)
-# ELF32-LLVM-NEXT:   Offset: 0x354
-# ELF64-LLVM-NEXT:   Offset: 0x5B8
+# ELF32-LLVM-NEXT:   Offset: 0x374
+# ELF64-LLVM-NEXT:   Offset: 0x5F0
 # ELF-LLVM-NEXT:     VirtualAddress: 0x1000
 # ELF-LLVM-NEXT:     PhysicalAddress: 0x1000
 # ELF-LLVM-NEXT:     FileSize: 3
@@ -325,8 +328,8 @@
 # ELF-LLVM-NEXT:   }
 # ELF-LLVM-NEXT:   ProgramHeader {
 # ELF-LLVM-NEXT:     Type: PT_OPENBSD_WXNEEDED (0x65A3DBE7)
-# ELF32-LLVM-NEXT:   Offset: 0x354
-# ELF64-LLVM-NEXT:   Offset: 0x5B8
+# ELF32-LLVM-NEXT:   Offset: 0x374
+# ELF64-LLVM-NEXT:   Offset: 0x5F0
 # ELF-LLVM-NEXT:     VirtualAddress: 0x1000
 # ELF-LLVM-NEXT:     PhysicalAddress: 0x1000
 # ELF-LLVM-NEXT:     FileSize: 3
@@ -337,8 +340,20 @@
 # ELF-LLVM-NEXT:   }
 # ELF-LLVM-NEXT:   ProgramHeader {
 # ELF-LLVM-NEXT:     Type: PT_OPENBSD_NOBTCFI (0x65A3DBE8)
-# ELF32-LLVM-NEXT:   Offset: 0x354
-# ELF64-LLVM-NEXT:   Offset: 0x5B8
+# ELF32-LLVM-NEXT:   Offset: 0x374
+# ELF64-LLVM-NEXT:   Offset: 0x5F0
+# ELF-LLVM-NEXT:     VirtualAddress: 0x1000
+# ELF-LLVM-NEXT:     PhysicalAddress: 0x1000
+# ELF-LLVM-NEXT:     FileSize: 3
+# ELF-LLVM-NEXT:     MemSize: 3
+# ELF-LLVM-NEXT:     Flags [ (0x0)
+# ELF-LLVM-NEXT:     ]
+# ELF-LLVM-NEXT:     Alignment: 1
+# ELF-LLVM-NEXT:   }
+# ELF-LLVM-NEXT:   ProgramHeader {
+# ELF-LLVM-NEXT:     Type: PT_OPENBSD_SYSCALLS (0x65A3DBE9)
+# ELF32-LLVM-NEXT:   Offset: 0x374
+# ELF64-LLVM-NEXT:   Offset: 0x5F0
 # ELF-LLVM-NEXT:     VirtualAddress: 0x1000
 # ELF-LLVM-NEXT:     PhysicalAddress: 0x1000
 # ELF-LLVM-NEXT:     FileSize: 3
@@ -349,8 +364,8 @@
 # ELF-LLVM-NEXT:   }
 # ELF-LLVM-NEXT:   ProgramHeader {
 # ELF-LLVM-NEXT:     Type: PT_OPENBSD_BOOTDATA (0x65A41BE6)
-# ELF32-LLVM-NEXT:   Offset: 0x354
-# ELF64-LLVM-NEXT:   Offset: 0x5B8
+# ELF32-LLVM-NEXT:   Offset: 0x374
+# ELF64-LLVM-NEXT:   Offset: 0x5F0
 # ELF-LLVM-NEXT:     VirtualAddress: 0x1000
 # ELF-LLVM-NEXT:     PhysicalAddress: 0x1000
 # ELF-LLVM-NEXT:     FileSize: 3
@@ -361,8 +376,8 @@
 # ELF-LLVM-NEXT:   }
 # ELF-LLVM-NEXT:   ProgramHeader {
 # ELF-LLVM-NEXT:     Type: Unknown (0x6FFFFFFF)
-# ELF32-LLVM-NEXT:   Offset: 0x354
-# ELF64-LLVM-NEXT:   Offset: 0x5B8
+# ELF32-LLVM-NEXT:   Offset: 0x374
+# ELF64-LLVM-NEXT:   Offset: 0x5F0
 # ELF-LLVM-NEXT:     VirtualAddress: 0x1000
 # ELF-LLVM-NEXT:     PhysicalAddress: 0x1000
 # ELF-LLVM-NEXT:     FileSize: 3
@@ -373,8 +388,8 @@
 # ELF-LLVM-NEXT:   }
 # ELF-LLVM-NEXT:   ProgramHeader {
 # ELF-LLVM-NEXT:     Type: Unknown (0x70000000)
-# ELF32-LLVM-NEXT:   Offset: 0x354
-# ELF64-LLVM-NEXT:   Offset: 0x5B8
+# ELF32-LLVM-NEXT:   Offset: 0x374
+# ELF64-LLVM-NEXT:   Offset: 0x5F0
 # ELF-LLVM-NEXT:     VirtualAddress: 0x1000
 # ELF-LLVM-NEXT:     PhysicalAddress: 0x1000
 # ELF-LLVM-NEXT:     FileSize: 3
@@ -385,8 +400,8 @@
 # ELF-LLVM-NEXT:   }
 # ELF-LLVM-NEXT:   ProgramHeader {
 # ELF-LLVM-NEXT:     Type: Unknown (0x70000001)
-# ELF32-LLVM-NEXT:   Offset: 0x354
-# ELF64-LLVM-NEXT:   Offset: 0x5B8
+# ELF32-LLVM-NEXT:   Offset: 0x374
+# ELF64-LLVM-NEXT:   Offset: 0x5F0
 # ELF-LLVM-NEXT:     VirtualAddress: 0x1000
 # ELF-LLVM-NEXT:     PhysicalAddress: 0x1000
 # ELF-LLVM-NEXT:     FileSize: 3
@@ -397,8 +412,8 @@
 # ELF-LLVM-NEXT:   }
 # ELF-LLVM-NEXT:   ProgramHeader {
 # ELF-LLVM-NEXT:     Type: Unknown (0x70000002)
-# ELF32-LLVM-NEXT:   Offset: 0x354
-# ELF64-LLVM-NEXT:   Offset: 0x5B8
+# ELF32-LLVM-NEXT:   Offset: 0x374
+# ELF64-LLVM-NEXT:   Offset: 0x5F0
 # ELF-LLVM-NEXT:     VirtualAddress: 0x1000
 # ELF-LLVM-NEXT:     PhysicalAddress: 0x1000
 # ELF-LLVM-NEXT:     FileSize: 3
@@ -409,8 +424,8 @@
 # ELF-LLVM-NEXT:   }
 # ELF-LLVM-NEXT:   ProgramHeader {
 # ELF-LLVM-NEXT:     Type: Unknown (0x70000003)
-# ELF32-LLVM-NEXT:   Offset: 0x354
-# ELF64-LLVM-NEXT:   Offset: 0x5B8
+# ELF32-LLVM-NEXT:   Offset: 0x374
+# ELF64-LLVM-NEXT:   Offset: 0x5F0
 # ELF-LLVM-NEXT:     VirtualAddress: 0x1000
 # ELF-LLVM-NEXT:     PhysicalAddress: 0x1000
 # ELF-LLVM-NEXT:     FileSize: 3
@@ -421,8 +436,8 @@
 # ELF-LLVM-NEXT:   }
 # ELF-LLVM-NEXT:   ProgramHeader {
 # ELF-LLVM-NEXT:     Type: Unknown (0x7FFFFFFF)
-# ELF32-LLVM-NEXT:   Offset: 0x354
-# ELF64-LLVM-NEXT:   Offset: 0x5B8
+# ELF32-LLVM-NEXT:   Offset: 0x374
+# ELF64-LLVM-NEXT:   Offset: 0x5F0
 # ELF-LLVM-NEXT:     VirtualAddress: 0x1000
 # ELF-LLVM-NEXT:     PhysicalAddress: 0x1000
 # ELF-LLVM-NEXT:     FileSize: 3
@@ -569,37 +584,42 @@ ProgramHeaders:
     VAddr:    0x1000
     FirstSec: .foo.begin
     LastSec:  .foo.end
-## Case 19: the PT_OPENBSD_BOOTDATA segment.
+## Case 19: the PT_OPENBSD_SYSCALLS segment.
+  - Type:     0x65a3dbe9 ## PT_OPENBSD_SYSCALLS
+    VAddr:    0x1000
+    FirstSec: .foo.begin
+    LastSec:  .foo.end
+## Case 20: the PT_OPENBSD_BOOTDATA segment.
   - Type:     0x65a41be6 ## PT_OPENBSD_BOOTDATA
     VAddr:    0x1000
     FirstSec: .foo.begin
     LastSec:  .foo.end
-## Case 20: the PT_HIOS segment.
+## Case 21: the PT_HIOS segment.
   - Type:     0x6fffffff ## PT_HIOS
     VAddr:    0x1000
     FirstSec: .foo.begin
     LastSec:  .foo.end
-## Case 21: the PT_LOPROC/PT_ARM_ARCHEXT/PT_MIPS_REGINFO segment.
+## Case 22: the PT_LOPROC/PT_ARM_ARCHEXT/PT_MIPS_REGINFO segment.
   - Type:     0x70000000 ## PT_LOPROC/PT_ARM_ARCHEXT/PT_MIPS_REGINFO
     VAddr:    0x1000
     FirstSec: .foo.begin
     LastSec:  .foo.end
-## Case 22: the PT_ARM_EXIDX/PT_MIPS_RTPROC segment.
+## Case 23: the PT_ARM_EXIDX/PT_MIPS_RTPROC segment.
   - Type:     0x70000001 ## PT_ARM_EXIDX, PT_MIPS_RTPROC
     VAddr:    0x1000
     FirstSec: .foo.begin
     LastSec:  .foo.end
-## Case 23: the PT_MIPS_OPTIONS segment.
+## Case 24: the PT_MIPS_OPTIONS segment.
   - Type:     0x70000002 ## PT_MIPS_OPTIONS
     VAddr:    0x1000
     FirstSec: .foo.begin
     LastSec:  .foo.end
-## Case 24: the PT_MIPS_ABIFLAGS/PT_RISCV_ATTRIBUTES segment.
+## Case 25: the PT_MIPS_ABIFLAGS/PT_RISCV_ATTRIBUTES segment.
   - Type:     0x70000003 ## PT_MIPS_ABIFLAGS/PT_RISCV_ATTRIBUTES
     VAddr:    0x1000
     FirstSec: .foo.begin
     LastSec:  .foo.end
-## Case 25: the PT_HIPROC segment.
+## Case 26: the PT_HIPROC segment.
   - Type:     0x7fffffff ## PT_HIPROC
     VAddr:    0x1000
     FirstSec: .foo.begin
@@ -610,9 +630,9 @@ ProgramHeaders:
 # RUN: llvm-readelf --program-headers %tarm.elf | FileCheck %s --check-prefix=ARM-GNU
 # RUN: llvm-readobj --program-headers %tarm.elf | FileCheck %s --check-prefix=ARM-LLVM
 
-# ARM-GNU:      <unknown>: 0x70000000 0x0005b8 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1
-# ARM-GNU-NEXT:            EXIDX      0x0005b8 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1
-# ARM-GNU-NEXT: <unknown>: 0x70000002 0x0005b8 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1
+# ARM-GNU:      <unknown>: 0x70000000 0x0005f0 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1
+# ARM-GNU-NEXT:            EXIDX      0x0005f0 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1
+# ARM-GNU-NEXT: <unknown>: 0x70000002 0x0005f0 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1
 
 # ARM-LLVM:      ProgramHeader {
 # ARM-LLVM:        Type: Unknown (0x70000000)
@@ -626,10 +646,10 @@ ProgramHeaders:
 # RUN: llvm-readelf --program-headers %tmips.elf | FileCheck %s --check-prefix=MIPS-GNU
 # RUN: llvm-readobj --program-headers %tmips.elf | FileCheck %s --check-prefix=MIPS-LLVM
 
-# MIPS-GNU:      REGINFO  0x0005b8 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1
-# MIPS-GNU-NEXT: RTPROC   0x0005b8 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1
-# MIPS-GNU-NEXT: OPTIONS  0x0005b8 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1
-# MIPS-GNU-NEXT: ABIFLAGS 0x0005b8 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1
+# MIPS-GNU:      REGINFO  0x0005f0 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1
+# MIPS-GNU-NEXT: RTPROC   0x0005f0 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1
+# MIPS-GNU-NEXT: OPTIONS  0x0005f0 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1
+# MIPS-GNU-NEXT: ABIFLAGS 0x0005f0 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1
 
 # MIPS-LLVM:      ProgramHeader {
 # MIPS-LLVM:        Type: PT_MIPS_REGINFO (0x70000000)
@@ -645,7 +665,7 @@ ProgramHeaders:
 # RUN: llvm-readelf --program-headers %triscv.elf | FileCheck %s --check-prefix=RISCV-GNU
 # RUN: llvm-readobj --program-headers %triscv.elf | FileCheck %s --check-prefix=RISCV-LLVM
 
-# RISCV-GNU:       ATTRIBUTES  0x0005b8 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1
+# RISCV-GNU:       ATTRIBUTES  0x0005f0 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1
 # RISCV-LLVM:      ProgramHeader {
 # RISCV-LLVM:        Type: PT_RISCV_ATTRIBUTES (0x70000003)
 
diff --git a/llvm/tools/llvm-objcopy/ObjcopyOptions.cpp b/llvm/tools/llvm-objcopy/ObjcopyOptions.cpp
index f15307181fad61..f63e5c61e802c8 100644
--- a/llvm/tools/llvm-objcopy/ObjcopyOptions.cpp
+++ b/llvm/tools/llvm-objcopy/ObjcopyOptions.cpp
@@ -299,6 +299,8 @@ static const StringMap<MachineInfo> TargetMap{
     // LoongArch
     {"elf32-loongarch", {ELF::EM_LOONGARCH, false, true}},
     {"elf64-loongarch", {ELF::EM_LOONGARCH, true, true}},
+    // SystemZ
+    {"elf64-s390", {ELF::EM_S390, true, false}},
 };
 
 static Expected<TargetInfo>
diff --git a/llvm/tools/llvm-objdump/ELFDump.cpp b/llvm/tools/llvm-objdump/ELFDump.cpp
index 34861ee92128fd..fda99bd6d33e17 100644
--- a/llvm/tools/llvm-objdump/ELFDump.cpp
+++ b/llvm/tools/llvm-objdump/ELFDump.cpp
@@ -291,6 +291,9 @@ template <class ELFT> void ELFDumper<ELFT>::printProgramHeaders() {
     case ELF::PT_OPENBSD_RANDOMIZE:
       outs() << "OPENBSD_RANDOMIZE ";
       break;
+    case ELF::PT_OPENBSD_SYSCALLS:
+      outs() << "OPENBSD_SYSCALLS ";
+      break;
     case ELF::PT_OPENBSD_WXNEEDED:
       outs() << "OPENBSD_WXNEEDED ";
       break;
diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp
index f369a63add1149..387124ad53e408 100644
--- a/llvm/tools/llvm-readobj/ELFDumper.cpp
+++ b/llvm/tools/llvm-readobj/ELFDumper.cpp
@@ -1478,6 +1478,7 @@ static StringRef segmentTypeToString(unsigned Arch, unsigned Type) {
     LLVM_READOBJ_ENUM_CASE(ELF, PT_OPENBSD_RANDOMIZE);
     LLVM_READOBJ_ENUM_CASE(ELF, PT_OPENBSD_WXNEEDED);
     LLVM_READOBJ_ENUM_CASE(ELF, PT_OPENBSD_NOBTCFI);
+    LLVM_READOBJ_ENUM_CASE(ELF, PT_OPENBSD_SYSCALLS);
     LLVM_READOBJ_ENUM_CASE(ELF, PT_OPENBSD_BOOTDATA);
   default:
     return "";
diff --git a/llvm/tools/llvm-shlib/CMakeLists.txt b/llvm/tools/llvm-shlib/CMakeLists.txt
index a47a0ec84c625c..09c15e304614c4 100644
--- a/llvm/tools/llvm-shlib/CMakeLists.txt
+++ b/llvm/tools/llvm-shlib/CMakeLists.txt
@@ -33,7 +33,10 @@ if(LLVM_BUILD_LLVM_DYLIB)
   if (LLVM_LINK_LLVM_DYLIB)
     set(INSTALL_WITH_TOOLCHAIN INSTALL_WITH_TOOLCHAIN)
   endif()
-  add_llvm_library(LLVM SHARED DISABLE_LLVM_LINK_LLVM_DYLIB SONAME ${INSTALL_WITH_TOOLCHAIN} ${SOURCES})
+  add_llvm_library(LLVM SHARED DISABLE_LLVM_LINK_LLVM_DYLIB OUTPUT_NAME LLVM ${INSTALL_WITH_TOOLCHAIN} ${SOURCES})
+  # Add symlink for backwards compatibility with old library name
+  get_target_property(LLVM_DYLIB_FILENAME LLVM OUTPUT_NAME)
+  llvm_install_library_symlink(LLVM-${LLVM_VERSION_MAJOR}${LLVM_VERSION_SUFFIX} ${LLVM_DYLIB_FILENAME} SHARED COMPONENT LLVM)
 
   list(REMOVE_DUPLICATES LIB_NAMES)
   if("${CMAKE_SYSTEM_NAME}" STREQUAL "Darwin")
diff --git a/llvm/utils/release/github-upload-release.py b/llvm/utils/release/github-upload-release.py
index a8bb569d2fc999..14ec05062d88c8 100755
--- a/llvm/utils/release/github-upload-release.py
+++ b/llvm/utils/release/github-upload-release.py
@@ -77,20 +77,28 @@ def upload_files(repo, release, files):
 parser.add_argument("--token", type=str)
 parser.add_argument("--release", type=str)
 parser.add_argument("--user", type=str)
+parser.add_argument("--user-token", type=str)
 
 # Upload args
 parser.add_argument("--files", nargs="+", type=str)
 
 args = parser.parse_args()
 
-github = github.Github(args.token)
-llvm_org = github.get_organization("llvm")
+gh = github.Github(args.token)
+llvm_org = gh.get_organization("llvm")
 llvm_repo = llvm_org.get_repo("llvm-project")
 
 if args.user:
+    if not args.user_token:
+        print("--user-token option required when --user is used")
+        sys.exit(1)
     # Validate that this user is allowed to modify releases.
-    user = github.get_user(args.user)
-    team = llvm_org.get_team_by_slug("llvm-release-managers")
+    user = gh.get_user(args.user)
+    team = (
+        github.Github(args.user_token)
+        .get_organization("llvm")
+        .get_team_by_slug("llvm-release-managers")
+    )
     if not team.has_in_members(user):
         print("User {} is not a allowed to modify releases".format(args.user))
         sys.exit(1)
diff --git a/llvm/utils/release/test-release.sh b/llvm/utils/release/test-release.sh
index 5b1945df47d24a..0af16387ce1d8e 100755
--- a/llvm/utils/release/test-release.sh
+++ b/llvm/utils/release/test-release.sh
@@ -537,6 +537,11 @@ function build_llvmCore() {
         InstallTarget="$InstallTarget install-runtimes"
       fi
     fi
+    if [ "$Phase" -eq "3" ]; then
+      # Build everything at once, with the proper parallelism and verbosity,
+      # in Phase 3.
+      BuildTarget=
+    fi
 
     cd $ObjDir
     echo "# Compiling llvm $Release-$RC $Flavor"
diff --git a/mlir/test/Target/LLVMIR/llvmir-le-specific.mlir b/mlir/test/Target/LLVMIR/llvmir-le-specific.mlir
new file mode 100644
index 00000000000000..f8d082082117cb
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/llvmir-le-specific.mlir
@@ -0,0 +1,27 @@
+// RUN: mlir-translate -mlir-to-llvmir -split-input-file %s | FileCheck %s
+
+// Decoding the attribute does not work on big-endian platforms currently
+// XFAIL: target=s390x-{{.*}}
+
+// CHECK{LITERAL}: @dense_resource_tensor_constant = internal constant [5 x float] [float 0x3FCA034080000000, float 0xBFD0466300000000, float 0xBFD75DDF80000000, float 0xBFDE074F40000000, float 0x3FDDD3A1C0000000]
+llvm.mlir.global internal constant @dense_resource_tensor_constant(dense_resource<dense_resource_test_5xf32> : tensor<5xf32>) : !llvm.array<5 x f32>
+
+// CHECK{LITERAL}: @dense_resource_vector_constant = internal constant <5 x float> <float 0x3FCA034080000000, float 0xBFD0466300000000, float 0xBFD75DDF80000000, float 0xBFDE074F40000000, float 0x3FDDD3A1C0000000>
+llvm.mlir.global internal constant @dense_resource_vector_constant(dense_resource<dense_resource_test_5xf32> : vector<5xf32>) : vector<5xf32>
+
+
+// CHECK{LITERAL}: @dense_resource_multidim_tensor_constant = internal constant [1 x [2 x [2 x float]]] [[2 x [2 x float]] [[2 x float] [float 0x3FD6B46A80000000, float 0x3FD6781AC0000000], [2 x float] [float 0xBFB45A2AA0000000, float 0x3FD77A5CA0000000]]]
+llvm.mlir.global internal constant @dense_resource_multidim_tensor_constant(dense_resource<dense_resource_test_2x2xf32> : tensor<1x2x2xf32>) : !llvm.array<1 x !llvm.array<2 x !llvm.array<2 x f32>>>
+
+// CHECK{LITERAL}: @dense_resource_multidim_vector_constant = internal constant [1 x [2 x <2 x float>]] [[2 x <2 x float>] [<2 x float> <float 0x3FD6B46A80000000, float 0x3FD6781AC0000000>, <2 x float> <float 0xBFB45A2AA0000000, float 0x3FD77A5CA0000000>]]
+llvm.mlir.global internal constant @dense_resource_multidim_vector_constant(dense_resource<dense_resource_test_2x2xf32> : vector<1x2x2xf32>) : !llvm.array<1 x !llvm.array<2 x vector<2 x f32>>>
+
+// Resources are kept at end of file. New tests should be added above this.
+{-#
+  dialect_resources: {
+    builtin: {
+      dense_resource_test_5xf32: "0x08000000041A503E183382BEFCEEBABE7A3AF0BE0E9DEE3E",
+      dense_resource_test_2x2xf32: "0x0800000054A3B53ED6C0B33E55D1A2BDE5D2BB3E"
+    }
+  }
+#-}
\ No newline at end of file
diff --git a/mlir/test/Target/LLVMIR/llvmir.mlir b/mlir/test/Target/LLVMIR/llvmir.mlir
index 448aa3a5d85d79..961c9484446845 100644
--- a/mlir/test/Target/LLVMIR/llvmir.mlir
+++ b/mlir/test/Target/LLVMIR/llvmir.mlir
@@ -101,19 +101,6 @@ llvm.mlir.global internal @dense_float_vector_3d(dense<[[[1.0, 2.0], [3.0, 4.0]]
 // CHECK{LITERAL}: @splat_float_vector_3d = internal global [2 x [2 x <2 x float>]] [[2 x <2 x float>] [<2 x float> <float 4.200000e+01, float 4.200000e+01>, <2 x float> <float 4.200000e+01, float 4.200000e+01>], [2 x <2 x float>] [<2 x float> <float 4.200000e+01, float 4.200000e+01>, <2 x float> <float 4.200000e+01, float 4.200000e+01>]]
 llvm.mlir.global internal @splat_float_vector_3d(dense<42.0> : vector<2x2x2xf32>) : !llvm.array<2 x !llvm.array<2 x vector<2xf32>>>
 
-// CHECK{LITERAL}: @dense_resource_tensor_constant = internal constant [5 x float] [float 0x3FCA034080000000, float 0xBFD0466300000000, float 0xBFD75DDF80000000, float 0xBFDE074F40000000, float 0x3FDDD3A1C0000000]
-llvm.mlir.global internal constant @dense_resource_tensor_constant(dense_resource<dense_resource_test_5xf32> : tensor<5xf32>) : !llvm.array<5 x f32>
-
-// CHECK{LITERAL}: @dense_resource_vector_constant = internal constant <5 x float> <float 0x3FCA034080000000, float 0xBFD0466300000000, float 0xBFD75DDF80000000, float 0xBFDE074F40000000, float 0x3FDDD3A1C0000000>
-llvm.mlir.global internal constant @dense_resource_vector_constant(dense_resource<dense_resource_test_5xf32> : vector<5xf32>) : vector<5xf32>
-
-
-// CHECK{LITERAL}: @dense_resource_multidim_tensor_constant = internal constant [1 x [2 x [2 x float]]] [[2 x [2 x float]] [[2 x float] [float 0x3FD6B46A80000000, float 0x3FD6781AC0000000], [2 x float] [float 0xBFB45A2AA0000000, float 0x3FD77A5CA0000000]]]
-llvm.mlir.global internal constant @dense_resource_multidim_tensor_constant(dense_resource<dense_resource_test_2x2xf32> : tensor<1x2x2xf32>) : !llvm.array<1 x !llvm.array<2 x !llvm.array<2 x f32>>>
-
-// CHECK{LITERAL}: @dense_resource_multidim_vector_constant = internal constant [1 x [2 x <2 x float>]] [[2 x <2 x float>] [<2 x float> <float 0x3FD6B46A80000000, float 0x3FD6781AC0000000>, <2 x float> <float 0xBFB45A2AA0000000, float 0x3FD77A5CA0000000>]]
-llvm.mlir.global internal constant @dense_resource_multidim_vector_constant(dense_resource<dense_resource_test_2x2xf32> : vector<1x2x2xf32>) : !llvm.array<1 x !llvm.array<2 x vector<2 x f32>>>
-
 //
 // Linkage attribute.
 //
@@ -1590,16 +1577,6 @@ llvm.func @invokeLandingpad() -> i32 attributes { personality = @__gxx_personali
   llvm.invoke %9(%6, %0) to ^bb2 unwind ^bb1 vararg(!llvm.func<void (ptr, ...)>) : !llvm.ptr, (!llvm.ptr, i32) -> ()
 }
 
-// Resources are kept at end of file. New tests should be added above this.
-{-#
-  dialect_resources: {
-    builtin: {
-      dense_resource_test_5xf32: "0x08000000041A503E183382BEFCEEBABE7A3AF0BE0E9DEE3E",
-      dense_resource_test_2x2xf32: "0x0800000054A3B53ED6C0B33E55D1A2BDE5D2BB3E"
-    }
-  }
-#-}
-
 // -----
 
 llvm.func @foo() -> i8
diff --git a/openmp/cmake/HandleOpenMPOptions.cmake b/openmp/cmake/HandleOpenMPOptions.cmake
index 71346201129b68..9387d9b3b0ff75 100644
--- a/openmp/cmake/HandleOpenMPOptions.cmake
+++ b/openmp/cmake/HandleOpenMPOptions.cmake
@@ -46,7 +46,11 @@ append_if(OPENMP_HAVE_WEXTRA_FLAG "-Wno-extra" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
 append_if(OPENMP_HAVE_WPEDANTIC_FLAG "-Wno-pedantic" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
 append_if(OPENMP_HAVE_WMAYBE_UNINITIALIZED_FLAG "-Wno-maybe-uninitialized" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
 
-append_if(OPENMP_HAVE_NO_SEMANTIC_INTERPOSITION "-fno-semantic-interposition" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
+if (NOT (WIN32 OR CYGWIN))
+  # This flag is not relevant on Windows; the flag is accepted, but produces warnings
+  # about argument unused during compilation.
+  append_if(OPENMP_HAVE_NO_SEMANTIC_INTERPOSITION "-fno-semantic-interposition" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
+endif()
 append_if(OPENMP_HAVE_FUNCTION_SECTIONS "-ffunction-section" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
 append_if(OPENMP_HAVE_DATA_SECTIONS "-fdata-sections" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
 
diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h
index c287a31e0b1b54..259c57b5afbca5 100644
--- a/openmp/runtime/src/kmp.h
+++ b/openmp/runtime/src/kmp.h
@@ -1181,7 +1181,11 @@ extern void __kmp_init_target_task();
 #define KMP_MIN_STKSIZE ((size_t)(32 * 1024))
 #endif
 
+#if KMP_OS_AIX && KMP_ARCH_PPC
+#define KMP_MAX_STKSIZE 0x10000000 /* 256Mb max size on 32-bit AIX */
+#else
 #define KMP_MAX_STKSIZE (~((size_t)1 << ((sizeof(size_t) * (1 << 3)) - 1)))
+#endif
 
 #if KMP_ARCH_X86
 #define KMP_DEFAULT_STKSIZE ((size_t)(2 * 1024 * 1024))
@@ -2494,7 +2498,8 @@ typedef struct kmp_dephash_entry kmp_dephash_entry_t;
 #define KMP_DEP_MTX 0x4
 #define KMP_DEP_SET 0x8
 #define KMP_DEP_ALL 0x80
-// Compiler sends us this info:
+// Compiler sends us this info. Note: some test cases contain an explicit copy
+// of this struct and should be in sync with any changes here.
 typedef struct kmp_depend_info {
   kmp_intptr_t base_addr;
   size_t len;
diff --git a/openmp/runtime/src/kmp_csupport.cpp b/openmp/runtime/src/kmp_csupport.cpp
index 9eeaeb88fb9ec7..878e78b5c7ad2d 100644
--- a/openmp/runtime/src/kmp_csupport.cpp
+++ b/openmp/runtime/src/kmp_csupport.cpp
@@ -1533,8 +1533,9 @@ void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
   kmp_dyna_lockseq_t lockseq = __kmp_map_hint_to_lock(hint);
   if (*lk == 0) {
     if (KMP_IS_D_LOCK(lockseq)) {
-      KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
-                                  KMP_GET_D_TAG(lockseq));
+      KMP_COMPARE_AND_STORE_ACQ32(
+          (volatile kmp_int32 *)&((kmp_base_tas_lock_t *)crit)->poll, 0,
+          KMP_GET_D_TAG(lockseq));
     } else {
       __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lockseq));
     }
diff --git a/openmp/runtime/src/kmp_gsupport.cpp b/openmp/runtime/src/kmp_gsupport.cpp
index 88189659a23416..4dc8a90f83b4ea 100644
--- a/openmp/runtime/src/kmp_gsupport.cpp
+++ b/openmp/runtime/src/kmp_gsupport.cpp
@@ -144,7 +144,7 @@ void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_BARRIER)(void) {
 
 // Mutual exclusion
 
-// The symbol that icc/ifort generates for unnamed for unnamed critical sections
+// The symbol that icc/ifort generates for unnamed critical sections
 // - .gomp_critical_user_ - is defined using .comm in any objects reference it.
 // We can't reference it directly here in C code, as the symbol contains a ".".
 //
diff --git a/openmp/runtime/src/kmp_lock.cpp b/openmp/runtime/src/kmp_lock.cpp
index 85c54f4cdc7e96..0ad14f862bcb9b 100644
--- a/openmp/runtime/src/kmp_lock.cpp
+++ b/openmp/runtime/src/kmp_lock.cpp
@@ -2689,7 +2689,7 @@ void __kmp_spin_backoff(kmp_backoff_t *boff) {
 // lock word.
 static void __kmp_init_direct_lock(kmp_dyna_lock_t *lck,
                                    kmp_dyna_lockseq_t seq) {
-  TCW_4(*lck, KMP_GET_D_TAG(seq));
+  TCW_4(((kmp_base_tas_lock_t *)lck)->poll, KMP_GET_D_TAG(seq));
   KA_TRACE(
       20,
       ("__kmp_init_direct_lock: initialized direct lock with type#%d\n", seq));
@@ -3180,8 +3180,8 @@ kmp_indirect_lock_t *__kmp_allocate_indirect_lock(void **user_lock,
   lck->type = tag;
 
   if (OMP_LOCK_T_SIZE < sizeof(void *)) {
-    *((kmp_lock_index_t *)user_lock) = idx
-                                       << 1; // indirect lock word must be even
+    *(kmp_lock_index_t *)&(((kmp_base_tas_lock_t *)user_lock)->poll) =
+        idx << 1; // indirect lock word must be even
   } else {
     *((kmp_indirect_lock_t **)user_lock) = lck;
   }
diff --git a/openmp/runtime/src/kmp_lock.h b/openmp/runtime/src/kmp_lock.h
index f21179b4eb68a1..e2a0cda01a9718 100644
--- a/openmp/runtime/src/kmp_lock.h
+++ b/openmp/runtime/src/kmp_lock.h
@@ -50,7 +50,7 @@ typedef struct ident ident_t;
 // recent versions), but we are bounded by the pointer-sized chunks that
 // the Intel compiler allocates.
 
-#if KMP_OS_LINUX && defined(KMP_GOMP_COMPAT)
+#if (KMP_OS_LINUX || KMP_OS_AIX) && defined(KMP_GOMP_COMPAT)
 #define OMP_LOCK_T_SIZE sizeof(int)
 #define OMP_NEST_LOCK_T_SIZE sizeof(void *)
 #else
@@ -120,8 +120,15 @@ extern void __kmp_validate_locks(void);
 
 struct kmp_base_tas_lock {
   // KMP_LOCK_FREE(tas) => unlocked; locked: (gtid+1) of owning thread
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ && __LP64__
+  // Flip the ordering of the high and low 32-bit member to be consistent
+  // with the memory layout of the address in 64-bit big-endian.
+  kmp_int32 depth_locked; // depth locked, for nested locks only
+  std::atomic<kmp_int32> poll;
+#else
   std::atomic<kmp_int32> poll;
   kmp_int32 depth_locked; // depth locked, for nested locks only
+#endif
 };
 
 typedef struct kmp_base_tas_lock kmp_base_tas_lock_t;
@@ -1138,11 +1145,13 @@ extern int (**__kmp_indirect_test)(kmp_user_lock_p, kmp_int32);
 
 // Extracts direct lock tag from a user lock pointer
 #define KMP_EXTRACT_D_TAG(l)                                                   \
-  (*((kmp_dyna_lock_t *)(l)) & ((1 << KMP_LOCK_SHIFT) - 1) &                   \
-   -(*((kmp_dyna_lock_t *)(l)) & 1))
+  ((kmp_dyna_lock_t)((kmp_base_tas_lock_t *)(l))->poll &                       \
+   ((1 << KMP_LOCK_SHIFT) - 1) &                                               \
+   -((kmp_dyna_lock_t)((kmp_tas_lock_t *)(l))->lk.poll & 1))
 
 // Extracts indirect lock index from a user lock pointer
-#define KMP_EXTRACT_I_INDEX(l) (*(kmp_lock_index_t *)(l) >> 1)
+#define KMP_EXTRACT_I_INDEX(l)                                                 \
+  ((kmp_lock_index_t)((kmp_base_tas_lock_t *)(l))->poll >> 1)
 
 // Returns function pointer to the direct lock function with l (kmp_dyna_lock_t
 // *) and op (operation type).
diff --git a/openmp/runtime/src/kmp_settings.cpp b/openmp/runtime/src/kmp_settings.cpp
index d2157b10b7819a..ec86ee07472c1e 100644
--- a/openmp/runtime/src/kmp_settings.cpp
+++ b/openmp/runtime/src/kmp_settings.cpp
@@ -255,8 +255,13 @@ static void __kmp_stg_parse_bool(char const *name, char const *value,
 // placed here in order to use __kmp_round4k static function
 void __kmp_check_stksize(size_t *val) {
   // if system stack size is too big then limit the size for worker threads
+#if KMP_OS_AIX
+  if (*val > KMP_DEFAULT_STKSIZE * 2) // Use 2 times, 16 is too large for AIX.
+    *val = KMP_DEFAULT_STKSIZE * 2;
+#else
   if (*val > KMP_DEFAULT_STKSIZE * 16) // just a heuristics...
     *val = KMP_DEFAULT_STKSIZE * 16;
+#endif
   if (*val < __kmp_sys_min_stksize)
     *val = __kmp_sys_min_stksize;
   if (*val > KMP_MAX_STKSIZE)
diff --git a/openmp/runtime/src/z_AIX_asm.S b/openmp/runtime/src/z_AIX_asm.S
new file mode 100644
index 00000000000000..d711fcb7a7854f
--- /dev/null
+++ b/openmp/runtime/src/z_AIX_asm.S
@@ -0,0 +1,410 @@
+//  z_AIX_asm.S:  - microtasking routines specifically
+//                  written for Power platforms running AIX OS
+
+//
+////===----------------------------------------------------------------------===//
+////
+//// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+//// See https://llvm.org/LICENSE.txt for license information.
+//// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+////
+////===----------------------------------------------------------------------===//
+//
+
+// -----------------------------------------------------------------------
+// macros
+// -----------------------------------------------------------------------
+
+#include "kmp_config.h"
+
+#if KMP_OS_AIX
+//------------------------------------------------------------------------
+// int
+// __kmp_invoke_microtask( void (*pkfn) (int *gtid, int *tid, ...),
+//                         int gtid, int tid,
+//                         int argc, void *p_argv[]
+// #if OMPT_SUPPORT
+//                         ,
+//                         void **exit_frame_ptr
+// #endif
+//                       ) {
+// #if OMPT_SUPPORT
+//   *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0);
+// #endif
+//
+//   (*pkfn)( & gtid, & tid, p_argv[0], ... );
+//
+// // FIXME: This is done at call-site and can be removed here.
+// #if OMPT_SUPPORT
+//   *exit_frame_ptr = 0;
+// #endif
+//
+//   return 1;
+// }
+//
+// parameters:
+//   r3: pkfn
+//   r4: gtid
+//   r5: tid
+//   r6: argc
+//   r7: p_argv
+//   r8: &exit_frame
+//
+// return:  r3 (always 1/TRUE)
+//
+
+#if KMP_ARCH_PPC64_XCOFF
+
+    .globl  __kmp_invoke_microtask[DS]
+    .globl  .__kmp_invoke_microtask
+    .align  4
+    .csect __kmp_invoke_microtask[DS],3
+    .vbyte  8, .__kmp_invoke_microtask
+    .vbyte  8, TOC[TC0]
+    .vbyte  8, 0
+    .csect .text[PR],2
+    .machine "pwr7"
+.__kmp_invoke_microtask:
+
+
+// -- Begin __kmp_invoke_microtask
+// mark_begin;
+
+// We need to allocate a stack frame large enough to hold all of the parameters
+// on the stack for the microtask plus what this function needs. That's 48
+// bytes under the XCOFF64 ABI, plus max(64, 8*(2 + argc)) for
+// the parameters to the microtask (gtid, tid, argc elements of p_argv),
+// plus 8 bytes to store the values of r4 and r5, and 8 bytes to store r31.
+// With OMP-T support, we need an additional 8 bytes to save r30 to hold
+// a copy of r8.
+// Stack offsets relative to stack pointer:
+//   r31: -8, r30: -16, gtid: -20, tid: -24
+
+    mflr 0
+    std 31, -8(1)      # Save r31 to the stack
+    std 0, 16(1)       # Save LR to the linkage area
+
+// This is unusual because normally we'd set r31 equal to r1 after the stack
+// frame is established. In this case, however, we need to dynamically compute
+// the stack frame size, and so we keep a direct copy of r1 to access our
+// register save areas and restore the r1 value before returning.
+    mr 31, 1
+
+// Compute the size of the "argc" portion of the parameter save area.
+// The parameter save area is always at least 64 bytes long (i.e. 8 regs)
+// The microtask has (2 + argc) parameters, so if argc <= 6, we need to
+// to allocate 8*6 bytes, not 8*argc.
+    li 0, 6
+    cmpwi 0, 6, 6
+    iselgt 0, 6, 0     # r0 = (argc > 6)? argc : 6
+    sldi 0, 0, 3       # r0 = 8 * max(argc, 6)
+
+// Compute the size necessary for the local stack frame.
+// 88 = 48 + 4 (for r4) + 4 (for r5) + 8 (for r31) + 8 (for OMP-T r30) +
+//      8 (parameter gtid) + 8 (parameter tid)
+    li 12, 88
+    add 12, 0, 12
+    neg 12, 12
+
+// We need to make sure that the stack frame stays aligned (to 16 bytes).
+    li 0, -16
+    and 12, 0, 12
+
+// Establish the local stack frame.
+    stdux 1, 1, 12
+
+#if OMPT_SUPPORT
+    std 30, -16(31)    # Save r30 to the stack
+    std 1, 0(8)
+    mr 30, 8
+#endif
+
+// Store gtid and tid to the stack because they're passed by reference to the microtask.
+    stw 4, -20(31)     # Save gtid to the stack
+    stw 5, -24(31)     # Save tid to the stack
+
+    mr 12, 6           # r12 = argc
+    mr 4, 7            # r4 = p_argv
+
+    cmpwi 0, 12, 1
+    blt 0, .Lcall      # if (argc < 1) goto .Lcall
+
+    ld 5, 0(4)         # r5 = p_argv[0]
+
+    cmpwi 0, 12, 2
+    blt 0, .Lcall      # if (argc < 2) goto .Lcall
+
+    ld 6, 8(4)         # r6 = p_argv[1]
+
+    cmpwi 0, 12, 3
+    blt 0, .Lcall      # if (argc < 3) goto .Lcall
+
+    ld 7, 16(4)        # r7 = p_argv[2]
+
+    cmpwi 0, 12, 4
+    blt 0, .Lcall      # if (argc < 4) goto .Lcall
+
+    ld 8, 24(4)        # r8 = p_argv[3]
+
+    cmpwi 0, 12, 5
+    blt 0, .Lcall      # if (argc < 5) goto .Lcall
+
+    ld 9, 32(4)        # r9 = p_argv[4]
+
+    cmpwi 0, 12, 6
+    blt 0, .Lcall      # if (argc < 6) goto .Lcall
+
+    ld 10, 40(4)       # r10 = p_argv[5]
+
+    cmpwi 0, 12, 7
+    blt 0, .Lcall      # if (argc < 7) goto .Lcall
+
+// There are more than 6 microtask parameters, so we need to store the
+// remainder to the stack.
+    addi 12, 12, -6    # argc -= 6
+    mtctr 12
+
+// These are set to 8 bytes before the first desired store address (we're using
+// pre-increment loads and stores in the loop below). The parameter save area
+// for the microtask begins 48 + 8*8 == 112 bytes above r1 for XCOFF64.
+    addi 4, 4, 40      # p_argv = p_argv + 5
+                       # (i.e. skip the 5 elements we already processed)
+    addi 12, 1, 104    # r12 = stack offset (112 - 8)
+
+.Lnext:
+    ldu 0, 8(4)
+    stdu 0, 8(12)
+    bdnz .Lnext
+
+.Lcall:
+    std 2, 40(1)     # Save the TOC pointer to the linkage area
+// Load the actual function address from the function descriptor.
+    ld 12, 0(3)      # Function address
+    ld 2, 8(3)       # TOC pointer
+    ld 11, 16(3)     # Environment pointer
+
+    addi 3, 31, -20  # r3 = &gtid
+    addi 4, 31, -24  # r4 = &tid
+
+    mtctr 12         # CTR = function address
+    bctrl            # Branch to CTR
+    ld 2, 40(1)      # Restore TOC pointer from linkage area
+
+#if OMPT_SUPPORT
+    li 3, 0
+    std 3, 0(30)
+#endif
+
+    li 3, 1
+
+#if OMPT_SUPPORT
+    ld 30, -16(31)   # Restore r30 from the saved value on the stack
+#endif
+
+    mr 1, 31
+    ld 31, -8(1)     # Restore r31 from the saved value on the stack
+    ld 0, 16(1)
+    mtlr 0           # Restore LR from the linkage area
+    blr              # Branch to LR
+
+#else  // KMP_ARCH_PPC_XCOFF
+
+    .globl  __kmp_invoke_microtask[DS]
+    .globl  .__kmp_invoke_microtask
+    .align  4
+    .csect __kmp_invoke_microtask[DS],2
+    .vbyte  4, .__kmp_invoke_microtask
+    .vbyte  4, TOC[TC0]
+    .vbyte  4, 0
+    .csect .text[PR],2
+    .machine "pwr7"
+.__kmp_invoke_microtask:
+
+
+// -- Begin __kmp_invoke_microtask
+// mark_begin;
+
+// We need to allocate a stack frame large enough to hold all of the parameters
+// on the stack for the microtask plus what this function needs. That's 24
+// bytes under the XCOFF ABI, plus max(32, 8*(2 + argc)) for
+// the parameters to the microtask (gtid, tid, argc elements of p_argv),
+// plus 8 bytes to store the values of r4 and r5, and 4 bytes to store r31.
+// With OMP-T support, we need an additional 4 bytes to save r30 to hold
+// a copy of r8.
+// Stack offsets relative to stack pointer:
+//   r31: -4, r30: -8, gtid: -12, tid: -16
+
+    mflr 0
+    stw 31, -4(1)      # Save r31 to the stack
+    stw 0, 8(1)        # Save LR to the linkage area
+
+// This is unusual because normally we'd set r31 equal to r1 after the stack
+// frame is established. In this case, however, we need to dynamically compute
+// the stack frame size, and so we keep a direct copy of r1 to access our
+// register save areas and restore the r1 value before returning.
+    mr 31, 1
+
+// Compute the size of the "argc" portion of the parameter save area.
+// The parameter save area is always at least 32 bytes long (i.e. 8 regs)
+// The microtask has (2 + argc) parameters, so if argc <= 6, we need to
+// to allocate 4*6 bytes, not 4*argc.
+    li 0, 6
+    cmpwi 0, 6, 6
+    iselgt 0, 6, 0     # r0 = (argc > 6)? argc : 6
+    slwi 0, 0, 2       # r0 = 4 * max(argc, 6)
+
+// Compute the size necessary for the local stack frame.
+// 56 = 32 + 4 (for r4) + 4 (for r5) + 4 (for r31) + 4 (for OMP-T r30) +
+//      4 (parameter gtid) + 4 (parameter tid)
+    li 12, 56
+    add 12, 0, 12
+    neg 12, 12
+
+// We need to make sure that the stack frame stays aligned (to 16 bytes).
+    li 0, -16
+    and 12, 0, 12
+
+// Establish the local stack frame.
+    stwux 1, 1, 12
+
+#if OMPT_SUPPORT
+    stw 30, -8(31)     # Save r30 to the stack
+    stw 1, 0(8)
+    mr 30, 8
+#endif
+
+// Store gtid and tid to the stack because they're passed by reference to the microtask.
+    stw 4, -12(31)     # Save gtid to the stack
+    stw 5, -16(31)     # Save tid to the stack
+
+    mr 12, 6           # r12 = argc
+    mr 4, 7            # r4 = p_argv
+
+    cmpwi 0, 12, 1
+    blt 0, .Lcall      # if (argc < 1) goto .Lcall
+
+    lwz 5, 0(4)        # r5 = p_argv[0]
+
+    cmpwi 0, 12, 2
+    blt 0, .Lcall      # if (argc < 2) goto .Lcall
+
+    lwz 6, 4(4)        # r6 = p_argv[1]
+
+    cmpwi 0, 12, 3
+    blt 0, .Lcall      # if (argc < 3) goto .Lcall
+
+    lwz 7, 8(4)        # r7 = p_argv[2]
+
+    cmpwi 0, 12, 4
+    blt 0, .Lcall      # if (argc < 4) goto .Lcall
+
+    lwz 8, 12(4)       # r8 = p_argv[3]
+
+    cmpwi 0, 12, 5
+    blt 0, .Lcall      # if (argc < 5) goto .Lcall
+
+    lwz 9, 16(4)       # r9 = p_argv[4]
+
+    cmpwi 0, 12, 6
+    blt 0, .Lcall      # if (argc < 6) goto .Lcall
+
+    lwz 10, 20(4)      # r10 = p_argv[5]
+
+    cmpwi 0, 12, 7
+    blt 0, .Lcall      # if (argc < 7) goto .Lcall
+
+// There are more than 6 microtask parameters, so we need to store the
+// remainder to the stack.
+    addi 12, 12, -6    # argc -= 6
+    mtctr 12
+
+// These are set to 4 bytes before the first desired store address (we're using
+// pre-increment loads and stores in the loop below). The parameter save area
+// for the microtask begins 24 + 4*8 == 56 bytes above r1 for XCOFF.
+    addi 4, 4, 20      # p_argv = p_argv + 5
+                       # (i.e. skip the 5 elements we already processed)
+    addi 12, 1, 52     # r12 = stack offset (56 - 4)
+
+.Lnext:
+    lwzu 0, 4(4)
+    stwu 0, 4(12)
+    bdnz .Lnext
+
+.Lcall:
+    stw 2, 20(1)     # Save the TOC pointer to the linkage area
+// Load the actual function address from the function descriptor.
+    lwz 12, 0(3)     # Function address
+    lwz 2, 4(3)      # TOC pointer
+    lwz 11, 8(3)     # Environment pointer
+
+    addi 3, 31, -12  # r3 = &gtid
+    addi 4, 31, -16  # r4 = &tid
+
+    mtctr 12         # CTR = function address
+    bctrl            # Branch to CTR
+    lwz 2, 20(1)     # Restore TOC pointer from linkage area
+
+#if OMPT_SUPPORT
+    li 3, 0
+    stw 3, 0(30)
+#endif
+
+    li 3, 1
+
+#if OMPT_SUPPORT
+    lwz 30, -8(31)   # Restore r30 from the saved value on the stack
+#endif
+
+    mr 1, 31
+    lwz 31, -4(1)    # Restore r31 from the saved value on the stack
+    lwz 0, 8(1)
+    mtlr 0           # Restore LR from the linkage area
+    blr              # Branch to LR
+
+#endif // KMP_ARCH_PPC64_XCOFF
+
+.Lfunc_end0:
+    .vbyte  4, 0x00000000           # Traceback table begin
+    .byte   0x00                    # Version = 0
+    .byte   0x09                    # Language = CPlusPlus
+    .byte   0x20                    # -IsGlobaLinkage, -IsOutOfLineEpilogOrPrologue
+                                    # +HasTraceBackTableOffset, -IsInternalProcedure
+                                    # -HasControlledStorage, -IsTOCless
+                                    # -IsFloatingPointPresent
+                                    # -IsFloatingPointOperationLogOrAbortEnabled
+    .byte   0x61                    # -IsInterruptHandler, +IsFunctionNamePresent, +IsAllocaUsed
+                                    # OnConditionDirective = 0, -IsCRSaved, +IsLRSaved
+    .byte   0x80                    # +IsBackChainStored, -IsFixup, NumOfFPRsSaved = 0
+#if OMPT_SUPPORT
+    .byte   0x02                    # -HasExtensionTable, -HasVectorInfo, NumOfGPRsSaved = 2
+    .byte   0x06                    # NumberOfFixedParms = 6
+#else
+    .byte   0x01                    # -HasExtensionTable, -HasVectorInfo, NumOfGPRsSaved = 1
+    .byte   0x05                    # NumberOfFixedParms = 5
+#endif
+    .byte   0x01                    # NumberOfFPParms = 0, +HasParmsOnStack
+    .vbyte  4, 0x00000000           # Parameter type = i, i, i, i, i
+    .vbyte  4, .Lfunc_end0-.__kmp_invoke_microtask # Function size
+    .vbyte  2, 0x0016               # Function name len = 22
+    .byte   "__kmp_invoke_microtask" # Function Name
+    .byte   0x1f                    # AllocaRegister = 31
+                                    # -- End function
+
+// -- End  __kmp_invoke_microtask
+
+// Support for unnamed common blocks.
+
+    .comm .gomp_critical_user_, 32, 3
+#if KMP_ARCH_PPC64_XCOFF
+    .csect __kmp_unnamed_critical_addr[RW],3
+#else
+    .csect __kmp_unnamed_critical_addr[RW],2
+#endif
+    .globl __kmp_unnamed_critical_addr[RW]
+    .ptr .gomp_critical_user_
+
+// -- End unnamed common block
+
+    .toc
+
+#endif // KMP_OS_AIX
diff --git a/openmp/runtime/test/tasking/bug_nested_proxy_task.c b/openmp/runtime/test/tasking/bug_nested_proxy_task.c
index 43502bdcd1abd1..24fe1f3fe7607c 100644
--- a/openmp/runtime/test/tasking/bug_nested_proxy_task.c
+++ b/openmp/runtime/test/tasking/bug_nested_proxy_task.c
@@ -50,12 +50,21 @@ typedef struct kmp_depend_info {
      union {
         kmp_uint8 flag; // flag as an unsigned char
         struct { // flag as a set of 8 bits
-            unsigned in : 1;
-            unsigned out : 1;
-            unsigned mtx : 1;
-            unsigned set : 1;
-            unsigned unused : 3;
-            unsigned all : 1;
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+          unsigned all : 1;
+          unsigned unused : 3;
+          unsigned set : 1;
+          unsigned mtx : 1;
+          unsigned out : 1;
+          unsigned in : 1;
+#else
+          unsigned in : 1;
+          unsigned out : 1;
+          unsigned mtx : 1;
+          unsigned set : 1;
+          unsigned unused : 3;
+          unsigned all : 1;
+#endif
         } flags;
      };
 } kmp_depend_info_t;
diff --git a/openmp/runtime/test/tasking/bug_proxy_task_dep_waiting.c b/openmp/runtime/test/tasking/bug_proxy_task_dep_waiting.c
index ff75df51aff077..688860c035728f 100644
--- a/openmp/runtime/test/tasking/bug_proxy_task_dep_waiting.c
+++ b/openmp/runtime/test/tasking/bug_proxy_task_dep_waiting.c
@@ -47,12 +47,21 @@ typedef struct kmp_depend_info {
      union {
         kmp_uint8 flag; // flag as an unsigned char
         struct { // flag as a set of 8 bits
-            unsigned in : 1;
-            unsigned out : 1;
-            unsigned mtx : 1;
-            unsigned set : 1;
-            unsigned unused : 3;
-            unsigned all : 1;
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+          unsigned all : 1;
+          unsigned unused : 3;
+          unsigned set : 1;
+          unsigned mtx : 1;
+          unsigned out : 1;
+          unsigned in : 1;
+#else
+          unsigned in : 1;
+          unsigned out : 1;
+          unsigned mtx : 1;
+          unsigned set : 1;
+          unsigned unused : 3;
+          unsigned all : 1;
+#endif
         } flags;
     };
 } kmp_depend_info_t;
diff --git a/openmp/runtime/test/tasking/hidden_helper_task/common.h b/openmp/runtime/test/tasking/hidden_helper_task/common.h
index 402ecf3ed553c9..ba57656cbac41d 100644
--- a/openmp/runtime/test/tasking/hidden_helper_task/common.h
+++ b/openmp/runtime/test/tasking/hidden_helper_task/common.h
@@ -17,9 +17,21 @@ typedef struct kmp_depend_info {
   union {
     unsigned char flag;
     struct {
-      bool in : 1;
-      bool out : 1;
-      bool mtx : 1;
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+      unsigned all : 1;
+      unsigned unused : 3;
+      unsigned set : 1;
+      unsigned mtx : 1;
+      unsigned out : 1;
+      unsigned in : 1;
+#else
+      unsigned in : 1;
+      unsigned out : 1;
+      unsigned mtx : 1;
+      unsigned set : 1;
+      unsigned unused : 3;
+      unsigned all : 1;
+#endif
     } flags;
   };
 } kmp_depend_info_t;